Ejemplo n.º 1
0
def file_to_specgram(path, specgrammode=None):
	if specgrammode==None: # default is to do a "normal" spectrogram right here
		if fftsize != framelen: raise ValueError("this mode requires normal fftsize")
		if not os.path.isfile(path):
			raise ValueError("path %s not found" % path)
		sf = Sndfile(path, "r")
		if sf.channels != 1:
			raise Error("ERROR in spemptk: sound file has multiple channels (%i) - mono audio required." % sf.channels)
		if sf.samplerate != fs:
			raise Error("ERROR in spemptk: wanted srate %g - got %g." % (fs, sf.samplerate))
		chunksize = 4096
		pcm = np.array([])
		while(True):
			try:
				chunk = sf.read_frames(chunksize, dtype=np.float32)
				pcm = np.hstack((pcm, chunk))
			except RuntimeError:
				break
		spec = stft(pcm).T
	else:
		raise ValueError("specgrammode not recognised: %s" % specgrammode)
	spec = spec[specfreqbinrange[0]:specfreqbinrange[1],:]
	mags = abs(spec)
	phasifiers = spec / mags
	if specgrammode==None:
		mags = np.log(mags)
	return (mags, phasifiers)
Ejemplo n.º 2
0
def file_to_specgram(path, specgrammode=None):
    if specgrammode == None:  # default is to do a "normal" spectrogram right here
        if fftsize != framelen:
            raise ValueError("this mode requires normal fftsize")
        if not os.path.isfile(path):
            raise ValueError("path %s not found" % path)
        sf = Sndfile(path, "r")
        if sf.channels != 1:
            raise Error(
                "ERROR in spemptk: sound file has multiple channels (%i) - mono audio required."
                % sf.channels)
        if sf.samplerate != fs:
            raise Error("ERROR in spemptk: wanted srate %g - got %g." %
                        (fs, sf.samplerate))
        chunksize = 4096
        pcm = np.array([])
        while (True):
            try:
                chunk = sf.read_frames(chunksize, dtype=np.float32)
                pcm = np.hstack((pcm, chunk))
            except RuntimeError:
                break
        spec = stft(pcm).T
    else:
        raise ValueError("specgrammode not recognised: %s" % specgrammode)
    spec = spec[specfreqbinrange[0]:specfreqbinrange[1], :]
    mags = abs(spec)
    phasifiers = spec / mags
    if specgrammode == None:
        mags = np.log(mags)
    return (mags, phasifiers)
Ejemplo n.º 3
0
def read_sound(fp):
    """
    create a normalized float array and datarate from any audo file
    """
    if fp.endswith('mp3'):
        try:
            oname = 'temp.wav'
            #cmd = 'lame --decode "{0}" {1}'.format( fp ,oname )
            result = subprocess.call(['lame', '--decode', fp, oname])
            assert(result is 0)
            samplerate, data = wav.read(oname)
        except:
            print "couldn't run lame"
            try:
                import moviepy.editor as mpy
                aud_clip = mpy.AudioFileClip(fp)
                samplerate = aud_clip.fps
                data = aud_clip.to_soundarray()
            except:
                print "moviepy not installed?"
    if fp.endswith('aif'):
        #sf = aifc.open(fp)
        oname = fp
        sf = Sndfile(fp, 'r')
        sf.seek(0)
        data = sf.read_frames(sf.nframes)
        samplerate = sf.samplerate
    if fp.endswith('wav'):
        samplerate, data = wav.read(fp)

    if len(data.shape)>1: data = data[:,0]
    data = data.astype('float64')
    data /= data.max()
    return data, samplerate
Ejemplo n.º 4
0
    def __create_feature(self, input_path, speaker_name, feature_filename,
                         mode):
        speaker_featurepath = os.path.join(self.features_rootpath,
                                           speaker_name)
        if not os.path.exists(speaker_featurepath):
            os.mkdir(speaker_featurepath)

        output_path = os.path.join(speaker_featurepath, feature_filename)
        f = Sndfile(input_path)
        n = f.nframes
        rate = f.samplerate
        data = f.read_frames(n)
        original_data = data * pow(2, 15)

        extractor = bob.bio.spear.extractor.Cepstral(win_length_ms=25,
                                                     n_filters=27,
                                                     n_ceps=13,
                                                     with_energy=False,
                                                     mel_scale=True,
                                                     features_mask=np.arange(
                                                         0, 39))
        preprocessor = bob.bio.spear.preprocessor.Energy_Thr()
        __, __, labels = preprocessor((rate, original_data))
        feature = extractor([rate, original_data, labels])

        out_file = bob.io.base.HDF5File(output_path, 'w')
        extractor.write_feature(feature, out_file)
        out_file.close()
    def file_to_features(self, wavpath):

        sf = Sndfile(wavpath, "r")
        window = np.hamming(framelen)
        features = []
        while (True):
            try:
                chunk = sf.read_frames(framelen, dtype=np.float32)
                if len(chunk) != framelen:
                    print("Not read sufficient samples - returning")
                    break
                if sf.channels != 1:
                    chunk = np.mean(chunk, 1)  # mixdown
                framespectrum = np.fft.fft(window * chunk)
                magspec = abs(framespectrum[:framelen / 2])

                # do the frequency warping and MFCC computation
                melSpectrum = self.mfccMaker.warpSpectrum(magspec)
                melCepstrum = self.mfccMaker.getMFCCs(melSpectrum, cn=True)
                melCepstrum = melCepstrum[1:]  # exclude zeroth coefficient
                melCepstrum = melCepstrum[:13]  # limit to lower MFCCs
                framefeatures = melCepstrum
                features.append(framefeatures)

            except RuntimeError:
                break

        sf.close()
        return np.array(features)
Ejemplo n.º 6
0
    def read_wav(self, sample_path):

        sample = Sndfile(cwd + sample_path, 'r')
        sampling_rate = sample.samplerate
        channels = sample.channels
        encoding = sample.encoding
        frames_count = sample.nframes

        frames = sample.read_frames(frames_count, dtype=np.float32)
        sample.close()
        del sample

        if channels == 1:
            text_type = 'mono'
            sample_type = 0
        elif channels == 2:
            text_type = 'stereo'
            sample_type = 0b01100100
        else:
            text_type = '{0}-channels'.format(channels)

        if OPTIONS['verbose'] > 1:
            print "*", encoding, text_type, 'sample "', sample_path, '"', 4 * frames_count, 'kB'

        if OPTIONS['play_sound']:
            play(frames.astype(np.float64).T, sampling_rate)

        self.update({
            'sample_data': frames,
            'sample_type': sample_type,
            'channels': 2,
            'sample_bittype': 4
        })
Ejemplo n.º 7
0
	def file_to_features(self, wavpath):
		"Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
		if verbose: print("Reading %s" % wavpath)
		if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath)
		sf = Sndfile(wavpath, "r")
		#if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels)
		if sf.samplerate != fs:         raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate))
		window = np.hamming(framelen)
		features = []
		while(True):
			try:
				chunk = sf.read_frames(framelen, dtype=np.float32)
				if len(chunk) != framelen:
					print("Not read sufficient samples - returning")
					break
				if sf.channels != 1:
					chunk = np.mean(chunk, 1) # mixdown
				framespectrum = np.fft.fft(window * chunk)
				magspec = abs(framespectrum[:framelen/2])

				# do the frequency warping and MFCC computation
				melSpectrum = self.mfccMaker.warpSpectrum(magspec)
				melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
				melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
				melCepstrum = melCepstrum[:13] # limit to lower MFCCs

				framefeatures = melCepstrum   # todo: include deltas? that can be your homework.

				features.append(framefeatures)
			except RuntimeError:
				break
		sf.close()
		return np.array(features)
Ejemplo n.º 8
0
def load_pcm(path):
    wave = Sndfile(path, "r")
    pcm = wave.read_frames(wave.nframes)
    wave.close()
    if wave.channels is not 1:
        pcm = pcm[:, 0]
    return (pcm, wave.samplerate)
    def file_to_features(self,wavpath):

        sf = Sndfile(wavpath, "r")
        window = np.hamming(framelen)
        features = []
        while(True):
                try:
                    chunk = sf.read_frames(framelen, dtype=np.float32)
                    if len(chunk) != framelen:
                        print("Not read sufficient samples - returning")
                        break
                    if sf.channels != 1:
                        chunk = np.mean(chunk, 1) # mixdown
                    framespectrum = np.fft.fft(window * chunk)
                    magspec = abs(framespectrum[:framelen/2])

                    # do the frequency warping and MFCC computation
                    melSpectrum = self.mfccMaker.warpSpectrum(magspec)
                    melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
                    melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
                    melCepstrum = melCepstrum[:13] # limit to lower MFCCs
                    framefeatures = melCepstrum
                    features.append(framefeatures)

                except RuntimeError:
                    break

        sf.close()
        return np.array(features)
Ejemplo n.º 10
0
def extractData(file_names):
    data = []
    targets = []
    for k, v in file_names.items():
        for f_name in v:
            source_fname = k + "/" + f_name
            target_fname = k + "/" + f_name.split(".")[0] + ".TXT"
            source_fname = "./TIMIT" + source_fname[1:]
            target_fname = "./TIMIT" + target_fname[1:]

            audio_file = Sndfile(source_fname, "r")
            sr = audio_file.samplerate
            audio = audio_file.read_frames(audio_file.nframes)
            datum = mfcc(audio, samplerate=sr, nfilt=64, numcep=40)
            #datum = logfbank( audio, samplerate=sr, nfilt=64 )
            datum = preprocessing.scale(datum)
            data.append(datum)
            audio_file.close()

            with open(target_fname, "r") as text_file:
                target_txt = ' '.join(text_file.read().lower().strip().replace(
                    ".", "").split()[2:])
                target_txt = filter(lambda x: x not in special_chars,
                                    target_txt)
                target_txt = target_txt.replace(' ', '  ').split(' ')
                target = np.hstack(
                    ['<space>' if x == '' else list(x) for x in target_txt])
                target = np.asarray( [ 0 if x == '<space>' else ord(x) - ( ord('a') - 1 )\
                                        for x in target ] )
                targets.append(target)
    return data, targets
Ejemplo n.º 11
0
	def file_to_features(self, wavpath):
		"Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
		if verbose: print("Reading %s" % wavpath)
		if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath)
		sf = Sndfile(wavpath, "r")
		#if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels)
		if sf.samplerate != fs:         raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate))
		window = np.hamming(framelen)
		features = []
		while(True):
			try:
				chunk = sf.read_frames(framelen, dtype=np.float32)
				if len(chunk) != framelen:
					print("Not read sufficient samples - returning")
					break
				if sf.channels != 1:
					chunk = np.mean(chunk, 1) # mixdown
				framespectrum = np.fft.fft(window * chunk)
				magspec = abs(framespectrum[:framelen/2])

				# do the frequency warping and MFCC computation
				melSpectrum = self.mfccMaker.warpSpectrum(magspec)
				melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
				melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
				melCepstrum = melCepstrum[:13] # limit to lower MFCCs

				framefeatures = melCepstrum   # todo: include deltas? that can be your homework.

				features.append(framefeatures)
			except RuntimeError:
				break
		sf.close()
		return np.array(features)
Ejemplo n.º 12
0
    class AudioWriter:
        syllableIndex = 0
        baseFilename = "syllable"
        fileOpen = False
        format = Format('flac', 'pcm24')
        f = None
        filecount = 0

        def open(self):
            self.f = Sndfile(self.baseFilename + "." + str(self.syllableIndex) + '.flac', 'w', self.format, 1, 44100)
            self.fileOpen = True

        def close(self):
            if self.fileOpen:
                self.f.close()
                self.syllableIndex += 1
                self.fileOpen = False

        def write(self, data):
            if not self.fileOpen:
                self.open()
            self.f.write_frames(data)

        def parseData(self, data):
            buffer = []
            for i in range(len(data) - 1):
                if i == len(data) - 2 or (data[i] == zero_val and data[i + 1] == zero_val):
                    if len(buffer) > 0:
                        self.write(np.array(buffer))
                        self.filecount += 1
                        buffer = []
                    self.close()
                else:
                    buffer.append(data[i])
Ejemplo n.º 13
0
def get_fft_points(sound_filename, fps, fft_pixels, rate = 1, fourierwidth = 0.3):
	"""TODO
	will generate rate points per frame
	Based on the script from
	http://classicalconvert.com/2008/04/
	how-to-visualize-music-using-animated-spectrograms-with
	-open-source-everything/"""
	f = Sndfile(sound_filename, 'r')
	divisor = f.samplerate / (rate * fps) # should be integer
	points = []
	framepos = 0L
	while framepos < f.nframes:
		read_len = (
			divisor if (framepos + divisor < f.nframes)
			else f.nframes - framepos)
		frames = f.read_frames(read_len)
		buff = []
		for frame in frames:
			# is frame iterable or just one chan?
			if getattr(frame, '__iter__', False):
				fval = sum(frame) / len(frame)
			else:
				fval = frame
			buff.append(fval)
		# TODO: trim to 1024 or so?
		outfft = fft(buff)
		spectrum = [
			(outfft[y].real
				if y < len(outfft) else 0.0)
			for y in xrange(fft_pixels)]
		points.append(spectrum)
		framepos += len(frames)
	f.close()
	# maximise
	return points
Ejemplo n.º 14
0
    def __init__(self,
                 fn,
                 rate=None,
                 pad_start=0,
                 seek=None,
                 duration=None,
                 rotation=None):
        fp = Sndfile(fn, 'r') if fn.endswith('.wav') else None
        if fp is None or (rate is not None and fp.samplerate != rate):
            # Convert to wav file
            if not os.path.isdir('/tmp/'):
                os.makedirs('/tmp/')
            snd_file = tempfile.NamedTemporaryFile('w',
                                                   prefix='/tmp/',
                                                   suffix='.wav',
                                                   delete=False)
            snd_file.close()

            convert2wav(fn, snd_file.name, rate)
            self.snd_fn = snd_file.name
            self.rm_flag = True

        else:
            self.snd_fn = fn
            self.rm_flag = False

        self.fp = Sndfile(self.snd_fn, 'r')
        self.num_channels = self.fp.channels
        self.rate = self.fp.samplerate
        self.num_frames = self.fp.nframes
        self.duration = self.num_frames / float(self.rate)

        self.k = 0
        self.pad = pad_start

        if seek is not None and seek > 0:
            num_frames = int(seek * self.rate)
            self.fp.read_frames(num_frames)
        else:
            seek = 0

        if duration is not None:
            self.duration = min(duration, self.duration - seek)
            self.num_frames = int(self.duration * self.rate)

        if rotation is not None:
            assert self.num_channels > 2  # Spatial audio
            assert -np.pi <= rotation < np.pi
            c = np.cos(rotation)
            s = np.sin(rotation)
            rot_mtx = np.array([
                [1, 0, 0, 0],  # W' = W
                [0, c, 0, s],  # Y' = X sin + Y cos
                [0, 0, 1, 0],  # Z' = Z
                [0, -s, 0, c]
            ])  # X' = X cos - Y sin
            self.rot_mtx = rot_mtx
        else:
            self.rot_mtx = None
def test_read_wave():
    f = Sndfile("../fcjf0/sa1.wav", 'r')
    data = f.read_frames(46797)
    data_arr = np.array(data)
    #print data_arr
    pyplot.figure()
    pyplot.specgram(data_arr)
    pyplot.show()
Ejemplo n.º 16
0
    def __init__(self, file_name):
        self.sf = Sndfile(file_name)

        self.file_format = self.sf.format
        self.nchans = self.sf.channels
        self.sr = self.sf.samplerate
        self.length = self.sf.nframes
        self.audio = self.sf.read_frames(self.length)
Ejemplo n.º 17
0
 def writeWAV(self, data, filename):
     format = Format('wav')
     if (len(data.shape) == 2):
         f = Sndfile(filename, 'w', format, 2, self.samplingRate)
         f.write_frames(data)
         f.close()
     else:
         f = Sndfile(filename, 'w', format, 1, self.samplingRate)
         f.write_frames(data)
         f.close()
Ejemplo n.º 18
0
def downsample(fs, sig):
    in_file = random_string() + ".wav"
    out_file = random_string() + ".wav"

    frame_len = fs * WINDOW_SIZE
    pad = len(sig)%frame_len
    if pad > 0:
        sig = np.append(sig, np.zeros(frame_len - pad))

    f = Sndfile(in_file, 'w', Format(type="wav", encoding='pcm16', endianness="file"), 1, fs)
    f.write_frames(sig) 
    f.close()

    sox_in = pysox.CSoxStream(in_file)
    sox_out = pysox.CSoxStream(out_file, 'w', pysox.CSignalInfo(SAMPLE_RATE, 1, 8), fileType='wav')
    sox_chain = pysox.CEffectsChain(sox_in, sox_out)
    sox_chain.add_effect(pysox.CEffect("rate", [str(SAMPLE_RATE)]))
    sox_chain.flow_effects()
    sox_out.close()

    f = Sndfile(out_file, 'r')
    sig = f.read_frames(f.nframes)
    f.close()

    os.unlink(in_file)
    os.unlink(out_file)

    return sig
Ejemplo n.º 19
0
    def _test_int_io(self, dt):
        # TODO: check if neg or pos value is the highest in abs
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            # Use almost full possible range possible for the given data-type
            nb = 2 ** (8 * np.dtype(dt).itemsize - 3)
            fs = 22050
            nbuff = fs
            a = np.random.random_integers(-nb, nb, nbuff)
            a = a.astype(dt)

            # Open the file for writing
            format = Format('wav', _DTYPE_TO_ENC[dt])
            b = Sndfile(fd, 'w', format, 1, fs)

            b.write_frames(a)
            b.close()

            b = Sndfile(cfilename, 'r')

            read_a  = b.read_frames(nbuff, dtype=dt)
            b.close()

            assert_array_equal(a, read_a)

        finally:
            close_tmp_file(rfd, cfilename)
Ejemplo n.º 20
0
    def test_bad_wavread(self):
        """ Check wavread on bad file"""
        # Create a tmp audio file with non wav format, write some random data into it,
        # and check it can not be opened by wavread
        rfd, fd, cfilename   = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050
            noise = 0.1 * N.random.randn(nbuff)

            # Open the copy file for writing
            format = audio_format('aiff', 'pcm16')
            b = Sndfile(cfilename, 'w', format, 1, nbuff)

            b.write_frames(noise)

            b.close()

            b = Sndfile(cfilename, 'r')
            rcnoise = b.read_frames(nbuff)
            b.close()

            try:
                rnoise  = wavread(cfilename)[0]
                raise Exception("wavread on non wav file succeded, expected to fail")
            except ValueError, e:
                pass
                #print str(e) + ", as expected"

        finally:
            close_tmp_file(rfd, cfilename)
def load_sound(filename):
    """
    load a sound file and return a numpy array

    INFO: The values are normalized between -1 and 1
    :param filename:
    :return: numpy array with (sound_lenght, channels) shape
    """
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes, dtype=np.float64)
    return data, f.samplerate
def load_sound(filename):
    """
    load a sound file and return a numpy array

    INFO: The values are normalized between -1 and 1
    :param filename:
    :return: numpy array with (sound_lenght, channels) shape
    """
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes, dtype=np.float64)
    return data, f.samplerate
Ejemplo n.º 23
0
def load(filename):
    """Load an audio file and average over channels. Returns the data as a
    numpy array and the sampling rate.

    """
    fh = Sndfile(filename, "r")
    data = fh.read_frames(fh.nframes)
    if data.ndim == 2:
        data = np.mean(data, axis=-1)
    rate = fh.samplerate
    return data, rate
Ejemplo n.º 24
0
def save_record():
    global data
    n_channels, fmt = 1, Format('flac', 'pcm16')
    caller_id = agi.get_variable("CALLERID(num)")
    file_name = 'TmpSpeechFile_' + caller_id + '.flac'
    _, temp_sound_file = mkstemp(file_name)
    flac_file = Sndfile(temp_sound_file, 'w', fmt, n_channels, SAMPLE_RATE)

    flac_file.write_frames(data)
    flac_audio = AudioSegment.from_file(temp_sound_file, "flac")
    flac_audio.export(PATH + FILE_NAME, format="mp3")
Ejemplo n.º 25
0
def load(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.

    Can be any format that libsndfile supports, like .wav, .flac, etc.
    """
    wave_file = Sndfile(filename, 'r')
    signal = wave_file.read_frames(wave_file.nframes)
    channels = wave_file.channels
    sample_rate = wave_file.samplerate
    return signal, sample_rate, channels
Ejemplo n.º 26
0
def save_wav(sound, action_label, object_label):
    wav_path = '/tmp/new_wav'
    filename = os.path.join(wav_path, action_label + '-' + object_label + '-' + str(time.time()) + '.wav')
    format = Format('wav')

    print 'writing', filename, '...',

    f = Sndfile(filename, 'w', format, 1, 44100)
    f.write_frames(sound)
    f.close()
    print 'DONE'
Ejemplo n.º 27
0
def load(filename):
    """Load a wave file and return the signal, sample rate and number of channels.
    
    Can be any format that libsndfile supports, like .wav, .flac, etc.
    
    """
    wave_file = Sndfile(filename, 'r')
    signal = wave_file.read_frames(wave_file.nframes)
    channels = wave_file.channels
    sample_rate = wave_file.samplerate
    return signal, sample_rate, channels
Ejemplo n.º 28
0
def CQT(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    if nfreqs != None:
        cqt = cqt[:nfreqs,:]
    delta1 = librosa.feature.delta(cqt,order=1)
    delta2 = librosa.feature.delta(cqt,order=2)
    energy = librosa.feature.rmse(y=data)
    features = np.vstack((cqt,delta1,delta2,energy))
    return features.T
Ejemplo n.º 29
0
def writeAudioOutput(output, fs, f, f2, outputTitle):
  """Writes audio output"""

  # Define an output audio format
  formt = Format('wav', 'float64')
  outFile = Sndfile(outputTitle, 'w', formt, 1, fs)
  outFile.write_frames(output)

  #Clean Up
  f.close()
  f2.close()
  outFile.close()
Ejemplo n.º 30
0
def extractOnsets(audio):
        od1 = OnsetDetection(method = 'hfc')
        od2 = OnsetDetection(method = 'complex')

        # let's also get the other algorithms we will need, and a pool to store the results

        w = Windowing(type = 'hann')
        fft = FFT() # this gives us a complex FFT
        c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

        pool = essentia.Pool()

        # let's get down to business
        for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
                mag, phase, = c2p(fft(w(frame)))
                pool.add('features.hfc', od1(mag, phase))
                pool.add('features.complex', od2(mag, phase))


        # Phase 2: compute the actual onsets locations
        onsets = Onsets()

        onsets_hfc = onsets(# this algo expects a matrix, not a vector
                array([ pool['features.hfc'] ]),

                # you need to specify weights, but as there is only a single
                # function, it doesn't actually matter which weight you give it
                [ 1 ])
#        np.savetxt(outFile, onsets_hfc, fmt='%f')

        #Let's just take the complex as an example
        onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])

        startTimes = onsets_hfc
        endTimes = onsets_hfc[1:]
        duration = Duration()
        endTimes = np.append(endTimes, duration(audio))

        slicer = Slicer(startTimes = array(startTimes), endTimes = array(endTimes))
        
        frames = slicer(audio)        

        lengthInFrames = 0
        for i in range(len(frames)):
                lengthInFrames = lengthInFrames + len(frames[i])

        format = Format('wav')
        global counter
        f = Sndfile('out'+ str(counter) + '.wav' , 'w', format, 1, 44100)
        counter = counter + 1
        f.write_frames(np.asarray(frames[0]))

        return frames
Ejemplo n.º 31
0
def specgram_to_file(path, mags, phasifiers, normalise=True, specgrammode=None):
	if specgrammode==None:
		mags = np.exp(mags)
	if normalise:
		mags -= np.min(mags)
	cplx = mags * phasifiers
	pcm = istft(cplx.T)
	if normalise:
		pcm /= np.max(pcm)
	outsf = Sndfile(path, "w", Format('wav'), 1, fs)
	outsf.write_frames(pcm)
	outsf.close()
Ejemplo n.º 32
0
def wav_to_flac(wav_name):
  cd, tmp_name = mkstemp('tmp.flac')

  Signal, fs = wavread(wav_name)[:2]
  assert(fs == RATE)

  fmt = Format('flac', 'pcm16')
  nchannels = 1
  flac_file = Sndfile(tmp_name, 'w', fmt, nchannels, RATE)
  flac_file.write_frames(Signal)

  return tmp_name
Ejemplo n.º 33
0
def create_flac_from(sound_samples):
    __console.log('prepare flac format for writing to file')
    n_channels, fmt     = 1, Format('flac', 'pcm16')
    caller_id           = get_stdn_var(stdin.CALLER_ID)
    __console.log('write to temp file')
    _, temp_sound_file  = mkstemp('TmpSpeechFile_' + caller_id + '.flac')
    __console.log('prepare sound file')
    flac_file           = Sndfile(temp_sound_file, 'w', fmt, n_channels, constants.RAW_RATE)

    flac_file.write_frames(np.array(sound_samples))
    __console.log('sound file saved')
    return temp_sound_file
Ejemplo n.º 34
0
    def __init__(self,
                 filename,
                 write=False,
                 format='wav',
                 rate=None,
                 channels=None):
        """
        Open audiofile for writing or reading

        Parameters
        ----------
        filename : mixed
            Input wav file. String if a real file, `sys.stdin` for
            standard in.
        write: boolean
            Set true for writing to a file
        rate : int
            Sample rate. Only required for writing
        channels : int
            Number of Channels. Only required for writing

        Notes
        -----

        * The data is assumed to be a numpy array of
          floats, normalized between -1 and 1.

        """
        try:
            from scikits.audiolab import Format, Sndfile
        except:
            raise RuntimeError('You must have scikits.audiolab installed')

        if filename is sys.stdin:
            filename = '-'

        if write is True and (rate is None or channels is None):
            raise ValueError('You must provide sampling rate and '
                             'number of channels for writing file.')

        if write is False:
            self.f = Sndfile(filename, 'r')

            self.channels = self.f.channels
            self.rate = self.f.samplerate

        else:
            format = Format(format)
            self.f = Sndfile(filename, 'w', format, channels, rate)

            self.channels = channels
            self.rate = rate
Ejemplo n.º 35
0
def analysefile(path, hopsize=0.5, mode='ch', numtop=1, framesize = 1024, chrm_kwargs=None, maxdursecs=None):
	"""Analyses an audio file from disk, dividing into lapped frames and returning an array holding [raw, peaks, slopecent] for each frame.
	Can also do plain FFT-type analysis as an alternative."""
	if (mode != 'ch') and (mode != 'fft'):
		raise ValueError('Mode %s not recognised' % mode)
	if not os.path.isfile(path):
		raise ValueError("path %s not found" % path)
	sf = Sndfile(path, "r")
	if sf.channels != 1:
		raise Error("ERROR in chirpletringmod: sound file has multiple channels (%i) - mono audio required." % sf.channels)
	#print sf.format
	if maxdursecs!=None:
		maxdurspls = maxdursecs * sf.samplerate
	else:
		maxdurspls = sf.nframes

	if chrm_kwargs != None:
		chrm_kwargs = deepcopy(chrm_kwargs)
		chrm_kwargs['samplerate'] = sf.samplerate
		chrm_kwargs['framesize']  = framesize
	else:
		chrm_kwargs = {'samplerate':sf.samplerate, 'framesize':framesize}

	ch = chirpletringmod.Chirpletringmod(**chrm_kwargs)

	ihop = int(hopsize * ch.framesize)
	unhop = ch.framesize - ihop
	numspecframes = sf.nframes / ihop
	print "File contains %i spectral frames" % numspecframes
	storeraw = numspecframes < 500
	frames = []
	moretocome = True
	data = zeros(ch.framesize, float32)
	while(moretocome):
		try:
			nextdata = sf.read_frames(ihop, dtype=float32)
		except RuntimeError:
			#print "sf.read_frames runtime error, assuming EOF"
			moretocome = False
		if len(nextdata) != ihop:
			print "data truncated, detected EOF"
			moretocome = False
			nextdata = hstack((nextdata, zeros(ihop - len(nextdata))))
		data = hstack(( data[ihop:],  nextdata ))

		frames.append(ch.analyseframeplusfeatures(data, hopsize, mode, numtop, storeraw))

		if len(data) >= maxdurspls:
			break

	sf.close()
	return {'ch':ch, 'frames':frames, 'srate':sf.samplerate, 'hopsize':hopsize, 'framesize':ch.framesize}   # the ch knows srate and framesize, why are we duplicating?
Ejemplo n.º 36
0
 def __init__(self,
              fn,
              samplerate,
              filefmt='wav',
              datafmt='pcm16',
              channels=1):
     fmt = Format(filefmt, datafmt)
     Sndfile.__init__(self,
                      fn,
                      mode='w',
                      format=fmt,
                      channels=channels,
                      samplerate=samplerate)
Ejemplo n.º 37
0
 def test_bigframes(self):
     """ Try to seek really far."""
     rawname = join(TEST_DATA_DIR, 'test.wav')
     a = Sndfile(rawname, 'r')
     try:
         try:
             a.seek(2 ** 60)
             raise Exception, \
                   "Seek really succeded ! This should not happen"
         except IOError, e:
             pass
     finally:
         a.close()
Ejemplo n.º 38
0
 def signal(self):
     if not hasattr(self, '_signal'):
         if self.offset_seconds is None:
             self._signal = Sndfile(self.path, mode='r').read_frames(
                 self.nframes, dtype=numpy.dtype(theanoconfig.floatX).type)
         else:
             self._signal = Sndfile(self.path, mode='r').read_frames(
                 self.nframes_extended,
                 dtype=numpy.dtype(theanoconfig.floatX).type)
             self._signal = \
                 self._signal[self.nframes_extended - self.nframes:]
         self.normalize()
     return self._signal
Ejemplo n.º 39
0
def plotSpectrogram(f,mode,channel):
    plt.close('all')
    # Extracting the name from the '.wav' file
    length = len(f)-1
    name = f[length-length :length-3]
    print "Processing: %s CH %s" % (name ,channel)
    plt.figure(figsize=(10.5,3), dpi=100); #figsize=(13,4)


    try:
# Some sound information
#	print "Input to Sndfile FN is %s" %f
        r = Sndfile(f)
        begin = 0 * r.samplerate
        stop = 59.8 * r.samplerate
        sample=r.read_frames(stop-begin)
# Setting out of some spectrogram variables
        Fs =  r.samplerate  #10000  
        NFFT = int(Fs*0.05) #*0.005)  # 5ms window
        noverlap = int(Fs*0.0025)
# Plotting
        fig= plt.specgram(sample[:,channel],Fs=Fs, NFFT=NFFT,noverlap=noverlap,
                         cmap=plt.get_cmap('jet'))
    except:
        print "Could not process %s" % name
        plt.figtext(0.5,0.5,"ERROR")
# Colourmap values that work well are: 'binary','bone' and 'jet'
  

    # plt.title(name)
    plt.title("CH"+str(channel)+" : "+f)
    plt.xticks([],[])  #gets rid of the x ticks and numbers
    plt.yticks([],[])  #gets rid of the y ticks and numbers

   
            # For normal plotting
            #    plt.title(f)
               
            #    plt.xlabel("Time (s)")
            #    plt.yticks([2000,4000,6000,8000,10000],[2,4,6,8,10])
            #    plt.ylabel("Frequency (kHz)")
                #plt.colorbar()
    try:
        # plt.savefig("./"+ name + ".png",fig=fig, bbox_inches='tight')
        plt.savefig(name+"CH"+str(channel)+".png",fig=fig, bbox_inches='tight')  #Save the results
        # if spectrogram has been sucessfully generated add it to the list of images that has$
        namePNG = "./" + name+"CH"+str(channel) + ".png"
        listN.append(namePNG)
#        print "Done."
    except:
        print "ERROR: %s" % name
Ejemplo n.º 40
0
def main(RAW_DATA_DIR, chunk_length, slide_length):
	OUTPUT_DIR=os.path.join(RAW_DATA_DIR, "parts")
	
	if os.path.isdir(OUTPUT_DIR):
		answer = raw_input("Parts already exist.\nType y to rebuild from scratch : ")
		if answer != 'y':
			print("NOT REBUILT !")
			sys.exit()
		else:
			shutil.rmtree(OUTPUT_DIR)
	os.makedirs(OUTPUT_DIR)
	
	print RAW_DATA_DIR
	print OUTPUT_DIR

	chunk_counter = 0
	format = Format('wav')

	wav_files = glob.glob(RAW_DATA_DIR + '/*.wav')

	for filename in wav_files:
		wav_file_path = filename
		path_no_extension = re.sub(ur'\.wav$', '', filename)
		# Read wav and csv files
		wave_info = scikits.audiolab.wavread(wav_file_path)
		data_wave = wave_info[0]
		samplerate = wave_info[1]
		len_wave = len(data_wave)

		# Cut them in chunk_length seconds chunks.
		# Sliding of slide_length seconds
		# Zero padding at the end
		time_counter = 0
		start_index = 0
		while(start_index < len_wave):
			end_index = (time_counter + chunk_length) * samplerate
			if end_index > len_wave:
				chunk_wave = np.concatenate((data_wave[start_index:], np.zeros((end_index-len_wave))))
			else:
				chunk_wave = data_wave[start_index:end_index]

			time_counter += slide_length
			start_index = time_counter * samplerate

			# Write the chunks
			outwave_name = OUTPUT_DIR + '/p' + str(chunk_counter) + '.wav'
			f = Sndfile(outwave_name, 'w', format, 1, samplerate)  # 1 stands for the number of channels
			f.write_frames(chunk_wave)

			chunk_counter += 1
	return
Ejemplo n.º 41
0
def readwavefile(inputwav):
	f = Sndfile(inputwav, 'r')
	fs = f.samplerate
	if fs != 44100 :
		print 'only 44.1kHz filess are supported at present'
		exit(1)
	nc = f.channels
	if nc != 1 :
		print 'only 1 channel supported at present'
		exit(1)
	nframes =  f.nframes
	wav = f.read_frames(nframes, dtype=np.float32)
	f.close()
	return wav
Ejemplo n.º 42
0
def logmel(filename,n_fft=2048,hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length)
    logmel = librosa.core.logamplitude(melspectrogram)
    if nfreqs != None:
        logmel = logmel[:nfreqs,:]
    energy = librosa.feature.rmse(y=data)
    spectr = np.vstack((logmel,energy))
    delta1 = librosa.feature.delta(spectr,order=1)
    delta2 = librosa.feature.delta(spectr,order=2)

    features = np.vstack((spectr,delta1,delta2))
    return features.T
Ejemplo n.º 43
0
   def _createDiff(self, expected, result):
       diff = Sndfile(self.diffPath,  mode='w',  format=result.format,  channels=result.channels,  samplerate=result.samplerate)
 
       index = 0
       frameSize = 1024
       while index < result.nframes:
               frameSize = min(result.nframes - index ,frameSize)
               resultFrame = result.read_frames(frameSize)
               expectedFrame = expected.read_frames(frameSize)
               diffFrame = expectedFrame - resultFrame
               diff.write_frames(diffFrame)
               if not allclose(resultFrame, expectedFrame):
                       return
               index += frameSize
Ejemplo n.º 44
0
def main():
   audioFiles = glob.glob("testSamples/*")
   for audioFile in audioFiles:
      snd = Sndfile(audioFile, "r")
      data = snd.read_frames(snd.nframes)
      fs = snd.samplerate
      (frames, freqs, bins, ax) = mp.specgram(data, frameSize, 
            noverlap=(frameSize/2), Fs=fs)
      mp.subplot(211)
      mp.plot(np.linspace(0,float(snd.nframes)/fs, snd.nframes),
            data * 10000+10000, alpha=0.4)
      mp.subplot(212)
      mp.plot(bins[0:-1], getSlices(frames, bins, 20))
      mp.show()
Ejemplo n.º 45
0
 def read_audio_file(self, file_name):
     #print "read_audio_file: reading file [",file_name,"]"
     if file_name=='': return
     f = Sndfile(unicode(file_name), 'r')
     wav_data = np.array(f.read_frames(f.nframes), dtype=np.float64)
     samplerate = f.samplerate
     f.close()
     nsamples = len(wav_data)
     if (len(wav_data.shape) > 1):    # take left channel of stereo track
         wav_data = wav_data[:,0]
     y = 1.0*wav_data
     x = np.arange(nsamples)*1.0/samplerate   # time values
     #print "read_audio_file: finished with file [",file_name,"]"
     return y, x, samplerate
Ejemplo n.º 46
0
def CQT_stacked(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    if nfreqs != None:
        cqt = cqt[:nfreqs,:]
    delta1 = librosa.feature.delta(cqt,order=1)
    delta2 = librosa.feature.delta(cqt,order=2)
    d,L    = cqt.shape
    cqt = cqt.T.reshape(1,L,d)
    delta1 = delta1.T.reshape(1,L,d)
    delta2 = delta2.T.reshape(1,L,d)
    features = np.vstack((cqt,delta1,delta2))
    return features
Ejemplo n.º 47
0
def logmel_stacked(filename,n_fft=2048,hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length)
    logmel = librosa.core.logamplitude(melspectrogram)
    if nfreqs != None:
        logmel = logmel[:nfreqs,:]
    delta1 = librosa.feature.delta(logmel,order=1)
    delta2 = librosa.feature.delta(logmel,order=2)
    d,L    = logmel.shape
    logmel = logmel.T.reshape(1,L,d)
    delta1 = delta1.T.reshape(1,L,d)
    delta2 = delta2.T.reshape(1,L,d)
    features = np.vstack((logmel,delta1,delta2))
    return features
Ejemplo n.º 48
0
 def readAudioFromFile(self):
     with contextlib.closing(Sndfile(self.filename)) as f:
         self.channels = f.channels
         self.sample_count = f.nframes
         self.sample_rate = f.samplerate
         self.samples = f.read_frames(f.nframes, dtype=numpy.dtype('int16'))
     return True
Ejemplo n.º 49
0
def load_dict(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.

    Can be any format supported by the underlying library (libsndfile or SciPy)
    """
    soundfile = {}
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        soundfile['signal'] = sf.read()
        soundfile['channels'] = sf.channels
        soundfile['fs'] = sf.samplerate
        soundfile['samples'] = len(sf)
        soundfile['format'] = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        soundfile['signal'] = sf.read_frames(sf.nframes)
        soundfile['channels'] = sf.channels
        soundfile['fs'] = sf.samplerate
        soundfile['samples'] = sf.nframes
        soundfile['format'] = sf.format
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        soundfile['fs'], soundfile['signal'] = read(filename)
        try:
            soundfile['channels'] = soundfile['signal'].shape[1]
        except IndexError:
            soundfile['channels'] = 1
        soundfile['samples'] = soundfile['signal'].shape[0]
        soundfile['format'] = str(soundfile['signal'].dtype)

    return soundfile
Ejemplo n.º 50
0
def bin_calculate(sent_file, bpm, stepsize):
	sound_file = Sndfile('{0}'.format(sent_file), 'r')
	sec_bin_size = ((60000/float((bpm*(stepsize/4)))))*0.001
	sixfour_sec_bin_size = ((60000/float((bpm*((stepsize*2)/4)))))*0.001
	bin_size = sec_bin_size*(sound_file.samplerate)
	shunt_window = int(sixfour_sec_bin_size*(sound_file.samplerate))
	return bin_size, shunt_window
Ejemplo n.º 51
0
def load(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.
    Can be any format supported by the underlying library (libsndfile or SciPy)
    """
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        signal = sf.read()
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = len(sf)
        file_format = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        signal = sf.read_frames(sf.nframes)
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = sf.nframes
        file_format = sf.format
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        sample_rate, signal = read(filename)
        try:
            channels = signal.shape[1]
        except IndexError:
            channels = 1
        samples = signal.shape[0]
        file_format = str(signal.dtype)

    return signal, sample_rate, channels
Ejemplo n.º 52
0
def process_recording(filename,
                      window_width=.03,
                      window_spacing=.02,
                      num_coeffs=40,
                      mel_encode=True):
    f = Sndfile(filename, 'r')
    fs = f.samplerate
    nc = f.channels
    enc = f.encoding
    n = f.nframes
    data = f.read_frames(n)

    samples = int(fs * window_width)
    num_windows = int((len(data) / (fs * window_spacing))) - 1
    freqs = np.fft.rfftfreq(samples, d=1. / fs)

    if len(freqs) % 2 == 1:
        idx = len(freqs) / 2
        pos_freqs = freqs[len(freqs) / 2:]
    else:
        idx = len(freqs) / 2 - 1
        pos_freqs = freqs[len(freqs) / 2 - 1:]

    spectragram = np.empty((num_windows, len(pos_freqs)))

    for i in range(num_windows):
        left = ((i + 1) * fs * window_spacing) - int(samples / 2)
        right = ((i + 1) * fs * window_spacing) + int(math.ceil(samples / 2))
        window = data[left:right]
        spectragram[i] = np.abs(np.fft.rfft(window)[idx:])

    edges = np.linspace(0, fs / 2., num=(num_coeffs + 2))
    if mel_encode:
        edges = mel_transform(edges)
    filter_bank = np.matrix(np.empty((num_coeffs, len(pos_freqs))))
    for i in range(num_coeffs):
        for j in range(len(pos_freqs)):
            if edges[i] <= pos_freqs[j] <= edges[i + 2]:
                filter_bank[i, j] = triangle(edges[i], edges[i + 1],
                                             edges[i + 2], pos_freqs[j])

    coeffs = np.empty((num_windows, num_coeffs))
    for i in range(num_windows):
        coeffs[i] = np.transpose(filter_bank *
                                 np.transpose(np.matrix(spectragram[i])))

    return np.transpose(coeffs), np.transpose(spectragram)
def duration(filename):
    """
    return duration of wav file in second
    :param filename:
    :return:
    """
    f = Sndfile(filename, 'r')
    return f.nframes / float(f.samplerate)
Ejemplo n.º 54
0
def export_audio_vidId(fn, samplerate, vid, time, cols):
    f = [interp1d(time, cols[:, i]) for i in range(cols.shape[1])]
    fmt = Format(type=fn.split('.')[-1])

    newfn = '.'.join(
        fn.split('.')[:-2] + [fn.split('.')[-2] + ',%d' % vid] +
        [fn.split('.')[-1]])

    print 'Writing', newfn
    wav = Sndfile(newfn, 'w', fmt, cols.shape[1], samplerate)
    a = arange(samplerate) / samplerate + time[0]
    for i in xrange(int((time[-1] - time[0]))):
        y = array([g(a + i) for g in f]).T
        wav.write_frames(y)
        print '%0.2f%%     \r' % (i / (time[-1] - time[0]) * 100),
        sys.stdout.flush()
    print '100%     '
Ejemplo n.º 55
0
    def __init__(
            self,
            filename,
            samplerate=44100,
            channels=1,
            format=Format('wav', 'float32'),
    ):

        self._info = {
            'filename': filename,
            'samplerate': samplerate,
            'channels': channels,
            'format': format,
            'frames': 0,
        }  # TODO: metadata not implemented

        self._sndfile = Sndfile(filename, 'w', format, channels, samplerate)
        if not self._sndfile:
            raise NameError('Sndfile error loading file %s' % filename)
Ejemplo n.º 56
0
def load_wav(fname, rate=None):
    fp = Sndfile(fname, 'r')
    _signal = fp.read_frames(fp.nframes)
    _signal = _signal.reshape((-1, fp.channels))
    _rate = fp.samplerate

    if _signal.ndim == 1:
        _signal.reshape((-1, 1))
    if rate is not None and rate != _rate:
        signal = resampy.resample(_signal,
                                  _rate,
                                  rate,
                                  axis=0,
                                  filter='kaiser_best')
    else:
        signal = _signal
        rate = _rate

    return signal, rate