Exemple #1
0
    def _test_write(self, func, format, filext):
        """ Check *write functions from matpi """
        rfd1, fd1, cfilename1 = open_tmp_file('matapi_test.' + filext)
        rfd2, fd2, cfilename2 = open_tmp_file('matapi_test.' + filext)
        try:
            nbuff = 22050
            fs = nbuff
            noise = 0.1 * N.random.randn(nbuff)

            # Open the first file for writing with Sndfile
            b = Sndfile(cfilename1, 'w', format, 1, fs)

            b.write_frames(noise)

            b.close()

            # Write same data with wavwrite
            func(noise, cfilename2, fs)

            # Compare if both files have both same audio data and same
            # meta-data
            f1 = Sndfile(cfilename1)
            f2 = Sndfile(cfilename2)

            assert_array_equal(f1.read_frames(f1.nframes),
                               f2.read_frames(f2.nframes))
            assert_equal(f1.format, f2.format)
            assert_equal(f1.samplerate, f2.samplerate)
            assert_equal(f1.channels, f2.channels)
            f1.close()
            f2.close()
        finally:
            close_tmp_file(rfd1, cfilename1)
            close_tmp_file(rfd2, cfilename2)
Exemple #2
0
    def test_basic_io(self):
        """ Check open, close and basic read/write"""
        # dirty !
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050

            # Open the test file for reading
            a = Sndfile(ofilename, 'r')
            nframes = a.nframes

            # Open the copy file for writing
            format = Format('wav', 'pcm16')
            b = Sndfile(fd, 'w', format, a.channels, a.samplerate)

            # Copy the data
            for i in range(nframes / nbuff):
                tmpa = a.read_frames(nbuff)
                assert tmpa.dtype == np.float
                b.write_frames(tmpa)
            nrem = nframes % nbuff
            tmpa = a.read_frames(nrem)
            assert tmpa.dtype == np.float
            b.write_frames(tmpa)

            a.close()
            b.close()
        finally:
            close_tmp_file(rfd, cfilename)
    def test_basic_io(self):
        """ Check open, close and basic read/write"""
        # dirty !
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050

            # Open the test file for reading
            a = Sndfile(ofilename, 'r')
            nframes = a.nframes

            # Open the copy file for writing
            format = Format('wav', 'pcm16')
            b = Sndfile(fd, 'w', format, a.channels, a.samplerate)

            # Copy the data
            for i in range(nframes / nbuff):
                tmpa    = a.read_frames(nbuff)
                assert tmpa.dtype == np.float
                b.write_frames(tmpa)
            nrem    = nframes % nbuff
            tmpa    = a.read_frames(nrem)
            assert tmpa.dtype == np.float
            b.write_frames(tmpa)

            a.close()
            b.close()
        finally:
            close_tmp_file(rfd, cfilename)
    def _test_write(self, func, format, filext):
        """ Check *write functions from matpi """
        rfd1, fd1, cfilename1  = open_tmp_file('matapi_test.' + filext)
        rfd2, fd2, cfilename2  = open_tmp_file('matapi_test.' + filext)
        try:
            nbuff = 22050
            fs = nbuff
            noise = 0.1 * N.random.randn(nbuff)

            # Open the first file for writing with Sndfile
            b = Sndfile(cfilename1, 'w', format, 1, fs)

            b.write_frames(noise)

            b.close()

            # Write same data with wavwrite
            func(noise, cfilename2, fs)

            # Compare if both files have both same audio data and same
            # meta-data
            f1  = Sndfile(cfilename1)
            f2  = Sndfile(cfilename2)

            assert_array_equal(f1.read_frames(f1.nframes), f2.read_frames(f2.nframes))
            assert_equal(f1.format, f2.format)
            assert_equal(f1.samplerate, f2.samplerate)
            assert_equal(f1.channels, f2.channels)
            f1.close()
            f2.close()
        finally:
            close_tmp_file(rfd1, cfilename1)
            close_tmp_file(rfd2, cfilename2)
Exemple #5
0
def load_soundfile(inwavpath, startpossecs, maxdursecs=None):
    """Loads audio data, optionally limiting to a specified start position and duration.
    Must be SINGLE-CHANNEL and matching our desired sample-rate."""
    framelen = 4096
    hopspls = framelen
    unhopspls = framelen - hopspls
    if (framelen % wavdownsample) != 0:
        raise ValueError("framelen needs to be a multiple of wavdownsample: %i, %i" % (
            framelen, wavdownsample))
    if (hopspls % wavdownsample) != 0:
        raise ValueError("hopspls  needs to be a multiple of wavdownsample: %i, %i" % (
            hopspls, wavdownsample))
    if maxdursecs == None:
        maxdursecs = 9999
    sf = Sndfile(inwavpath, "r")
    splsread = 0
    framesread = 0
    if sf.channels != 1:
        raise ValueError(
            "Sound file %s has multiple channels (%i) - mono required." % (inwavpath, sf.channels))
    timemax_spls = int(maxdursecs * sf.samplerate)
    if sf.samplerate != (srate * wavdownsample):
        raise ValueError(
            "Sample rate mismatch: we expect %g, file has %g" % (srate, sf.samplerate))
    if startpossecs > 0:
        # note: returns IOError if beyond the end
        sf.seek(startpossecs * sf.samplerate)
    audiodata = np.array([], dtype=np.float32)
    while(True):
        try:
            if splsread == 0:
                chunk = sf.read_frames(framelen)[::wavdownsample]
                splsread += framelen
            else:
                chunk = np.hstack(
                    (chunk[:unhopspls], sf.read_frames(hopspls)[::wavdownsample]))
                splsread += hopspls
            framesread += 1
            if framesread % 25000 == 0:
                print("Read %i frames" % framesread)
            if len(chunk) != (framelen / wavdownsample):
                print("Not read sufficient samples - returning")
                break
            chunk = np.array(chunk, dtype=np.float32)
            audiodata = np.hstack((audiodata, chunk))
            if splsread >= timemax_spls:
                break
        except RuntimeError:
            break
    sf.close()
    return audiodata
    def test_simple(self):
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        # Open the test file for reading
        a = Sndfile(ofilename, 'r')
        nframes = a.nframes

        buffsize = 1024
        buffsize = min(nframes, buffsize)

        # First, read some frames, go back, and compare buffers
        buff = a.read_frames(buffsize)
        a.seek(0)
        buff2 = a.read_frames(buffsize)
        assert_array_equal(buff, buff2)

        a.close()

        # Now, read some frames, go back, and compare buffers
        # (check whence == 1 == SEEK_CUR)
        a = Sndfile(ofilename, 'r')
        a.read_frames(buffsize)
        buff = a.read_frames(buffsize)
        a.seek(-buffsize, 1)
        buff2 = a.read_frames(buffsize)
        assert_array_equal(buff, buff2)

        a.close()

        # Now, read some frames, go back, and compare buffers
        # (check whence == 2 == SEEK_END)
        a = Sndfile(ofilename, 'r')
        buff = a.read_frames(nframes)
        a.seek(-buffsize, 2)
        buff2 = a.read_frames(buffsize)
        assert_array_equal(buff[-buffsize:], buff2)
Exemple #7
0
    def test_simple(self):
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        # Open the test file for reading
        a = Sndfile(ofilename, 'r')
        nframes = a.nframes

        buffsize = 1024
        buffsize = min(nframes, buffsize)

        # First, read some frames, go back, and compare buffers
        buff = a.read_frames(buffsize)
        a.seek(0)
        buff2 = a.read_frames(buffsize)
        assert_array_equal(buff, buff2)

        a.close()

        # Now, read some frames, go back, and compare buffers
        # (check whence == 1 == SEEK_CUR)
        a = Sndfile(ofilename, 'r')
        a.read_frames(buffsize)
        buff = a.read_frames(buffsize)
        a.seek(-buffsize, 1)
        buff2 = a.read_frames(buffsize)
        assert_array_equal(buff, buff2)

        a.close()

        # Now, read some frames, go back, and compare buffers
        # (check whence == 2 == SEEK_END)
        a = Sndfile(ofilename, 'r')
        buff = a.read_frames(nframes)
        a.seek(-buffsize, 2)
        buff2 = a.read_frames(buffsize)
        assert_array_equal(buff[-buffsize:], buff2)
def read_sound(fp):
    """
    create a normalized float array and datarate from any audo file
    """
    if fp.endswith('mp3'):
        try:
            oname = 'temp.wav'
            #cmd = 'lame --decode "{0}" {1}'.format( fp ,oname )
            result = subprocess.call(['lame', '--decode', fp, oname])
            assert(result is 0)
            samplerate, data = wav.read(oname)
        except:
            print "couldn't run lame"
            try:
                import moviepy.editor as mpy
                aud_clip = mpy.AudioFileClip(fp)
                samplerate = aud_clip.fps
                data = aud_clip.to_soundarray()
            except:
                print "moviepy not installed?"
    if fp.endswith('aif'):
        #sf = aifc.open(fp)
        oname = fp
        sf = Sndfile(fp, 'r')
        sf.seek(0)
        data = sf.read_frames(sf.nframes)
        samplerate = sf.samplerate
    if fp.endswith('wav'):
        samplerate, data = wav.read(fp)

    if len(data.shape)>1: data = data[:,0]
    data = data.astype('float64')
    data /= data.max()
    return data, samplerate
    def test_bad_wavread(self):
        """ Check wavread on bad file"""
        # Create a tmp audio file with non wav format, write some random data into it,
        # and check it can not be opened by wavread
        rfd, fd, cfilename   = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050
            noise = 0.1 * N.random.randn(nbuff)

            # Open the copy file for writing
            format = audio_format('aiff', 'pcm16')
            b = Sndfile(cfilename, 'w', format, 1, nbuff)

            b.write_frames(noise)

            b.close()

            b = Sndfile(cfilename, 'r')
            rcnoise = b.read_frames(nbuff)
            b.close()

            try:
                rnoise  = wavread(cfilename)[0]
                raise Exception("wavread on non wav file succeded, expected to fail")
            except ValueError, e:
                pass
                #print str(e) + ", as expected"

        finally:
            close_tmp_file(rfd, cfilename)
def load_pcm(path):
    wave = Sndfile(path, "r")
    pcm = wave.read_frames(wave.nframes)
    wave.close()
    if wave.channels is not 1:
        pcm = pcm[:, 0]
    return (pcm, wave.samplerate)
Exemple #11
0
def file_to_specgram(path, specgrammode=None):
	if specgrammode==None: # default is to do a "normal" spectrogram right here
		if fftsize != framelen: raise ValueError("this mode requires normal fftsize")
		if not os.path.isfile(path):
			raise ValueError("path %s not found" % path)
		sf = Sndfile(path, "r")
		if sf.channels != 1:
			raise Error("ERROR in spemptk: sound file has multiple channels (%i) - mono audio required." % sf.channels)
		if sf.samplerate != fs:
			raise Error("ERROR in spemptk: wanted srate %g - got %g." % (fs, sf.samplerate))
		chunksize = 4096
		pcm = np.array([])
		while(True):
			try:
				chunk = sf.read_frames(chunksize, dtype=np.float32)
				pcm = np.hstack((pcm, chunk))
			except RuntimeError:
				break
		spec = stft(pcm).T
	else:
		raise ValueError("specgrammode not recognised: %s" % specgrammode)
	spec = spec[specfreqbinrange[0]:specfreqbinrange[1],:]
	mags = abs(spec)
	phasifiers = spec / mags
	if specgrammode==None:
		mags = np.log(mags)
	return (mags, phasifiers)
Exemple #12
0
def file_to_specgram(path, specgrammode=None):
    if specgrammode == None:  # default is to do a "normal" spectrogram right here
        if fftsize != framelen:
            raise ValueError("this mode requires normal fftsize")
        if not os.path.isfile(path):
            raise ValueError("path %s not found" % path)
        sf = Sndfile(path, "r")
        if sf.channels != 1:
            raise Error(
                "ERROR in spemptk: sound file has multiple channels (%i) - mono audio required."
                % sf.channels)
        if sf.samplerate != fs:
            raise Error("ERROR in spemptk: wanted srate %g - got %g." %
                        (fs, sf.samplerate))
        chunksize = 4096
        pcm = np.array([])
        while (True):
            try:
                chunk = sf.read_frames(chunksize, dtype=np.float32)
                pcm = np.hstack((pcm, chunk))
            except RuntimeError:
                break
        spec = stft(pcm).T
    else:
        raise ValueError("specgrammode not recognised: %s" % specgrammode)
    spec = spec[specfreqbinrange[0]:specfreqbinrange[1], :]
    mags = abs(spec)
    phasifiers = spec / mags
    if specgrammode == None:
        mags = np.log(mags)
    return (mags, phasifiers)
Exemple #13
0
    def _test_int_io(self, dt):
        # TODO: check if neg or pos value is the highest in abs
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            # Use almost full possible range possible for the given data-type
            nb = 2**(8 * np.dtype(dt).itemsize - 3)
            fs = 22050
            nbuff = fs
            a = np.random.random_integers(-nb, nb, nbuff)
            a = a.astype(dt)

            # Open the file for writing
            format = Format('wav', _DTYPE_TO_ENC[dt])
            b = Sndfile(fd, 'w', format, 1, fs)

            b.write_frames(a)
            b.close()

            b = Sndfile(cfilename, 'r')

            read_a = b.read_frames(nbuff, dtype=dt)
            b.close()

            assert_array_equal(a, read_a)

        finally:
            close_tmp_file(rfd, cfilename)
Exemple #14
0
	def file_to_features(self, wavpath):
		"Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
		if verbose: print("Reading %s" % wavpath)
		if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath)
		sf = Sndfile(wavpath, "r")
		#if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels)
		if sf.samplerate != fs:         raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate))
		window = np.hamming(framelen)
		features = []
		while(True):
			try:
				chunk = sf.read_frames(framelen, dtype=np.float32)
				if len(chunk) != framelen:
					print("Not read sufficient samples - returning")
					break
				if sf.channels != 1:
					chunk = np.mean(chunk, 1) # mixdown
				framespectrum = np.fft.fft(window * chunk)
				magspec = abs(framespectrum[:framelen/2])

				# do the frequency warping and MFCC computation
				melSpectrum = self.mfccMaker.warpSpectrum(magspec)
				melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
				melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
				melCepstrum = melCepstrum[:13] # limit to lower MFCCs

				framefeatures = melCepstrum   # todo: include deltas? that can be your homework.

				features.append(framefeatures)
			except RuntimeError:
				break
		sf.close()
		return np.array(features)
Exemple #15
0
    def test_bad_wavread(self):
        """ Check wavread on bad file"""
        # Create a tmp audio file with non wav format, write some random data into it,
        # and check it can not be opened by wavread
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050
            noise = 0.1 * N.random.randn(nbuff)

            # Open the copy file for writing
            format = audio_format('aiff', 'pcm16')
            b = Sndfile(cfilename, 'w', format, 1, nbuff)

            b.write_frames(noise)

            b.close()

            b = Sndfile(cfilename, 'r')
            rcnoise = b.read_frames(nbuff)
            b.close()

            try:
                rnoise = wavread(cfilename)[0]
                raise Exception(
                    "wavread on non wav file succeded, expected to fail")
            except ValueError, e:
                pass
                #print str(e) + ", as expected"

        finally:
            close_tmp_file(rfd, cfilename)
    def _test_int_io(self, dt):
        # TODO: check if neg or pos value is the highest in abs
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            # Use almost full possible range possible for the given data-type
            nb = 2 ** (8 * np.dtype(dt).itemsize - 3)
            fs = 22050
            nbuff = fs
            a = np.random.random_integers(-nb, nb, nbuff)
            a = a.astype(dt)

            # Open the file for writing
            format = Format('wav', _DTYPE_TO_ENC[dt])
            b = Sndfile(fd, 'w', format, 1, fs)

            b.write_frames(a)
            b.close()

            b = Sndfile(cfilename, 'r')

            read_a  = b.read_frames(nbuff, dtype=dt)
            b.close()

            assert_array_equal(a, read_a)

        finally:
            close_tmp_file(rfd, cfilename)
Exemple #17
0
	def file_to_features(self, wavpath):
		"Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
		if verbose: print("Reading %s" % wavpath)
		if not os.path.isfile(wavpath): raise ValueError("path %s not found" % wavpath)
		sf = Sndfile(wavpath, "r")
		#if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels)
		if sf.samplerate != fs:         raise ValueError("wanted sample rate %g - got %g." % (fs, sf.samplerate))
		window = np.hamming(framelen)
		features = []
		while(True):
			try:
				chunk = sf.read_frames(framelen, dtype=np.float32)
				if len(chunk) != framelen:
					print("Not read sufficient samples - returning")
					break
				if sf.channels != 1:
					chunk = np.mean(chunk, 1) # mixdown
				framespectrum = np.fft.fft(window * chunk)
				magspec = abs(framespectrum[:framelen/2])

				# do the frequency warping and MFCC computation
				melSpectrum = self.mfccMaker.warpSpectrum(magspec)
				melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
				melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
				melCepstrum = melCepstrum[:13] # limit to lower MFCCs

				framefeatures = melCepstrum   # todo: include deltas? that can be your homework.

				features.append(framefeatures)
			except RuntimeError:
				break
		sf.close()
		return np.array(features)
Exemple #18
0
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1):

	originalWav = Sndfile(inputAudio, 'r')
	x = originalWav.read_frames(originalWav.nframes)
	fs = originalWav.samplerate
	nChannel = originalWav.channels
	print fs
	if nChannel >1:
		x = x[0]


	w = np.hamming(801)
	N = 2048
	t = -90
	minSineDur = .005
	maxnSines = 150
	freqDevOffset = 20
	freqDevSlope = 0.02
	Ns = 512
	H = Ns/4
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
	inputDur = float(len(tfreq)*H/fs)
	#timeScale = np.array([0.1,0.1, inputDur, inputDur*2])
	timeScale = np.array([0,0, .4,outputDuration])

	ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale)
	y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
	
	if writeOutput ==1:
		outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate)
		outputWav.write_frames(y)
		outputWav.close()
	else:
		return y, fs, nChannel
Exemple #19
0
def downsample(fs, sig):
    in_file = random_string() + ".wav"
    out_file = random_string() + ".wav"

    frame_len = fs * WINDOW_SIZE
    pad = len(sig)%frame_len
    if pad > 0:
        sig = np.append(sig, np.zeros(frame_len - pad))

    f = Sndfile(in_file, 'w', Format(type="wav", encoding='pcm16', endianness="file"), 1, fs)
    f.write_frames(sig) 
    f.close()

    sox_in = pysox.CSoxStream(in_file)
    sox_out = pysox.CSoxStream(out_file, 'w', pysox.CSignalInfo(SAMPLE_RATE, 1, 8), fileType='wav')
    sox_chain = pysox.CEffectsChain(sox_in, sox_out)
    sox_chain.add_effect(pysox.CEffect("rate", [str(SAMPLE_RATE)]))
    sox_chain.flow_effects()
    sox_out.close()

    f = Sndfile(out_file, 'r')
    sig = f.read_frames(f.nframes)
    f.close()

    os.unlink(in_file)
    os.unlink(out_file)

    return sig
Exemple #20
0
def get_fft_points(sound_filename, fps, fft_pixels, rate = 1, fourierwidth = 0.3):
	"""TODO
	will generate rate points per frame
	Based on the script from
	http://classicalconvert.com/2008/04/
	how-to-visualize-music-using-animated-spectrograms-with
	-open-source-everything/"""
	f = Sndfile(sound_filename, 'r')
	divisor = f.samplerate / (rate * fps) # should be integer
	points = []
	framepos = 0L
	while framepos < f.nframes:
		read_len = (
			divisor if (framepos + divisor < f.nframes)
			else f.nframes - framepos)
		frames = f.read_frames(read_len)
		buff = []
		for frame in frames:
			# is frame iterable or just one chan?
			if getattr(frame, '__iter__', False):
				fval = sum(frame) / len(frame)
			else:
				fval = frame
			buff.append(fval)
		# TODO: trim to 1024 or so?
		outfft = fft(buff)
		spectrum = [
			(outfft[y].real
				if y < len(outfft) else 0.0)
			for y in xrange(fft_pixels)]
		points.append(spectrum)
		framepos += len(frames)
	f.close()
	# maximise
	return points
Exemple #21
0
    def __create_feature(self, input_path, speaker_name, feature_filename,
                         mode):
        speaker_featurepath = os.path.join(self.features_rootpath,
                                           speaker_name)
        if not os.path.exists(speaker_featurepath):
            os.mkdir(speaker_featurepath)

        output_path = os.path.join(speaker_featurepath, feature_filename)
        f = Sndfile(input_path)
        n = f.nframes
        rate = f.samplerate
        data = f.read_frames(n)
        original_data = data * pow(2, 15)

        extractor = bob.bio.spear.extractor.Cepstral(win_length_ms=25,
                                                     n_filters=27,
                                                     n_ceps=13,
                                                     with_energy=False,
                                                     mel_scale=True,
                                                     features_mask=np.arange(
                                                         0, 39))
        preprocessor = bob.bio.spear.preprocessor.Energy_Thr()
        __, __, labels = preprocessor((rate, original_data))
        feature = extractor([rate, original_data, labels])

        out_file = bob.io.base.HDF5File(output_path, 'w')
        extractor.write_feature(feature, out_file)
        out_file.close()
Exemple #22
0
def downsample(fs, sig):
    in_file = random_string() + ".wav"
    out_file = random_string() + ".wav"

    frame_len = fs * WINDOW_SIZE
    pad = len(sig) % frame_len
    if pad > 0:
        sig = np.append(sig, np.zeros(frame_len - pad))

    f = Sndfile(in_file, 'w',
                Format(type="wav", encoding='pcm16', endianness="file"), 1, fs)
    f.write_frames(sig)
    f.close()

    sox_in = pysox.CSoxStream(in_file)
    sox_out = pysox.CSoxStream(out_file,
                               'w',
                               pysox.CSignalInfo(SAMPLE_RATE, 1, 8),
                               fileType='wav')
    sox_chain = pysox.CEffectsChain(sox_in, sox_out)
    sox_chain.add_effect(pysox.CEffect("rate", [str(SAMPLE_RATE)]))
    sox_chain.flow_effects()
    sox_out.close()

    f = Sndfile(out_file, 'r')
    sig = f.read_frames(f.nframes)
    f.close()

    os.unlink(in_file)
    os.unlink(out_file)

    return sig
def extractData(file_names):
    data = []
    targets = []
    for k, v in file_names.items():
        for f_name in v:
            source_fname = k + "/" + f_name
            target_fname = k + "/" + f_name.split(".")[0] + ".TXT"
            source_fname = "./TIMIT" + source_fname[1:]
            target_fname = "./TIMIT" + target_fname[1:]

            audio_file = Sndfile(source_fname, "r")
            sr = audio_file.samplerate
            audio = audio_file.read_frames(audio_file.nframes)
            datum = mfcc(audio, samplerate=sr, nfilt=64, numcep=40)
            #datum = logfbank( audio, samplerate=sr, nfilt=64 )
            datum = preprocessing.scale(datum)
            data.append(datum)
            audio_file.close()

            with open(target_fname, "r") as text_file:
                target_txt = ' '.join(text_file.read().lower().strip().replace(
                    ".", "").split()[2:])
                target_txt = filter(lambda x: x not in special_chars,
                                    target_txt)
                target_txt = target_txt.replace(' ', '  ').split(' ')
                target = np.hstack(
                    ['<space>' if x == '' else list(x) for x in target_txt])
                target = np.asarray( [ 0 if x == '<space>' else ord(x) - ( ord('a') - 1 )\
                                        for x in target ] )
                targets.append(target)
    return data, targets
Exemple #24
0
    def read_wav(self, sample_path):

        sample = Sndfile(cwd + sample_path, 'r')
        sampling_rate = sample.samplerate
        channels = sample.channels
        encoding = sample.encoding
        frames_count = sample.nframes

        frames = sample.read_frames(frames_count, dtype=np.float32)
        sample.close()
        del sample

        if channels == 1:
            text_type = 'mono'
            sample_type = 0
        elif channels == 2:
            text_type = 'stereo'
            sample_type = 0b01100100
        else:
            text_type = '{0}-channels'.format(channels)

        if OPTIONS['verbose'] > 1:
            print "*", encoding, text_type, 'sample "', sample_path, '"', 4 * frames_count, 'kB'

        if OPTIONS['play_sound']:
            play(frames.astype(np.float64).T, sampling_rate)

        self.update({
            'sample_data': frames,
            'sample_type': sample_type,
            'channels': 2,
            'sample_bittype': 4
        })
    def file_to_features(self,wavpath):

        sf = Sndfile(wavpath, "r")
        window = np.hamming(framelen)
        features = []
        while(True):
                try:
                    chunk = sf.read_frames(framelen, dtype=np.float32)
                    if len(chunk) != framelen:
                        print("Not read sufficient samples - returning")
                        break
                    if sf.channels != 1:
                        chunk = np.mean(chunk, 1) # mixdown
                    framespectrum = np.fft.fft(window * chunk)
                    magspec = abs(framespectrum[:framelen/2])

                    # do the frequency warping and MFCC computation
                    melSpectrum = self.mfccMaker.warpSpectrum(magspec)
                    melCepstrum = self.mfccMaker.getMFCCs(melSpectrum,cn=True)
                    melCepstrum = melCepstrum[1:]   # exclude zeroth coefficient
                    melCepstrum = melCepstrum[:13] # limit to lower MFCCs
                    framefeatures = melCepstrum
                    features.append(framefeatures)

                except RuntimeError:
                    break

        sf.close()
        return np.array(features)
    def file_to_features(self, wavpath):

        sf = Sndfile(wavpath, "r")
        window = np.hamming(framelen)
        features = []
        while (True):
            try:
                chunk = sf.read_frames(framelen, dtype=np.float32)
                if len(chunk) != framelen:
                    print("Not read sufficient samples - returning")
                    break
                if sf.channels != 1:
                    chunk = np.mean(chunk, 1)  # mixdown
                framespectrum = np.fft.fft(window * chunk)
                magspec = abs(framespectrum[:framelen / 2])

                # do the frequency warping and MFCC computation
                melSpectrum = self.mfccMaker.warpSpectrum(magspec)
                melCepstrum = self.mfccMaker.getMFCCs(melSpectrum, cn=True)
                melCepstrum = melCepstrum[1:]  # exclude zeroth coefficient
                melCepstrum = melCepstrum[:13]  # limit to lower MFCCs
                framefeatures = melCepstrum
                features.append(framefeatures)

            except RuntimeError:
                break

        sf.close()
        return np.array(features)
def test_read_wave():
    f = Sndfile("../fcjf0/sa1.wav", 'r')
    data = f.read_frames(46797)
    data_arr = np.array(data)
    #print data_arr
    pyplot.figure()
    pyplot.specgram(data_arr)
    pyplot.show()
    def test_rw(self):
        """Test read/write pointers for seek."""
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        rfd, fd, cfilename   = open_tmp_file('rwseektest.wav')
        try:
            ref = Sndfile(ofilename, 'r')
            test = Sndfile(fd, 'rw', format=ref.format,
                           channels=ref.channels, samplerate=ref.samplerate)
            n = 1024

            rbuff = ref.read_frames(n, dtype = np.int16)
            test.write_frames(rbuff)
            tbuff = test.read_frames(n, dtype = np.int16)

            assert_array_equal(rbuff, tbuff)

            # Test seeking both read and write pointers
            test.seek(0, 0)
            test.write_frames(rbuff)
            tbuff = test.read_frames(n, dtype = np.int16)
            assert_array_equal(rbuff, tbuff)

            # Test seeking only read pointer
            rbuff1 = rbuff.copy()
            rbuff2 = rbuff1 * 2 + 1
            rbuff2.clip(-30000, 30000)
            test.seek(0, 0, 'r')
            test.write_frames(rbuff2)
            tbuff1 = test.read_frames(n, dtype = np.int16)
            try:
                tbuff2 = test.read_frames(n, dtype = np.int16)
            except IOError, e:
                msg = "write pointer was updated in read seek !"
                msg += "\n(msg is %s)" % e
                raise AssertionError(msg)

            assert_array_equal(rbuff1, tbuff1)
            assert_array_equal(rbuff2, tbuff2)
            if np.all(rbuff2 == tbuff1):
                raise AssertionError("write pointer was updated"\
                        " in read seek !")

            # Test seeking only write pointer
            rbuff3 = rbuff1 * 2 - 1
            rbuff3.clip(-30000, 30000)
            test.seek(0, 0, 'rw')
            test.seek(n, 0, 'w')
            test.write_frames(rbuff3)
            tbuff1 = test.read_frames(n, np.int16)
            try:
                assert_array_equal(tbuff1, rbuff1)
            except AssertionError:
                raise AssertionError("read pointer was updated in write seek !")

            try:
                tbuff3 = test.read_frames(n, np.int16)
            except IOError, e:
                msg = "read pointer was updated in write seek !"
                msg += "\n(msg is %s)" % e
                raise AssertionError(msg)
Exemple #29
0
class AudioFile:
    def __init__(self, file_name):
        self.sf = Sndfile(file_name)

        self.file_format = self.sf.format
        self.nchans = self.sf.channels
        self.sr = self.sf.samplerate
        self.length = self.sf.nframes
        self.audio = self.sf.read_frames(self.length)
Exemple #30
0
class AudioFile:
	def __init__(self, file_name):
		self.sf = Sndfile(file_name)

		self.file_format = self.sf.format
		self.nchans = self.sf.channels
		self.sr = self.sf.samplerate
		self.length = self.sf.nframes
		self.audio = self.sf.read_frames(self.length)
def load_sound(filename):
    """
    load a sound file and return a numpy array

    INFO: The values are normalized between -1 and 1
    :param filename:
    :return: numpy array with (sound_lenght, channels) shape
    """
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes, dtype=np.float64)
    return data, f.samplerate
def load(filename):
    """Load an audio file and average over channels. Returns the data as a
    numpy array and the sampling rate.

    """
    fh = Sndfile(filename, "r")
    data = fh.read_frames(fh.nframes)
    if data.ndim == 2:
        data = np.mean(data, axis=-1)
    rate = fh.samplerate
    return data, rate
Exemple #33
0
def CQT(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    if nfreqs != None:
        cqt = cqt[:nfreqs,:]
    delta1 = librosa.feature.delta(cqt,order=1)
    delta2 = librosa.feature.delta(cqt,order=2)
    energy = librosa.feature.rmse(y=data)
    features = np.vstack((cqt,delta1,delta2,energy))
    return features.T
Exemple #34
0
def load(filename):
    """Load a wave file and return the signal, sample rate and number of channels.
    
    Can be any format that libsndfile supports, like .wav, .flac, etc.
    
    """
    wave_file = Sndfile(filename, 'r')
    signal = wave_file.read_frames(wave_file.nframes)
    channels = wave_file.channels
    sample_rate = wave_file.samplerate
    return signal, sample_rate, channels
Exemple #35
0
def load(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.

    Can be any format that libsndfile supports, like .wav, .flac, etc.
    """
    wave_file = Sndfile(filename, 'r')
    signal = wave_file.read_frames(wave_file.nframes)
    channels = wave_file.channels
    sample_rate = wave_file.samplerate
    return signal, sample_rate, channels
Exemple #36
0
def hodorifyIt(inputFile, outputFile, karaokeExt = '.txt'):

	#reading input wave file
	inputAudio = Sndfile(inputFile, 'r')
	audio = inputAudio.read_frames(inputAudio.nframes)
	nframes = inputAudio.nframes
	fs = inputAudio.samplerate
	nChannel = inputAudio.channels

	fname, ext = os.path.splitext(inputFile)
	karaokeFile  = fname + karaokeExt

	#parse the karaoke file
	karaokeData = KP.parseKaraokeFile(karaokeFile)


	#assign which syllable to use ho and which ones to use dor
	toogle = 0
	sylType = ['ho', 'dor']
	for ii,elem in enumerate(karaokeData['data']):
		if elem['syl'] == '-':
			toogle =0
			continue
		karaokeData['data'][ii]['sylType'] = sylType[toogle]
		toogle = (toogle +1)%2

	dumpSonicVisualizerAnnotFile("tryHODOR.txt", karaokeData['data'])

	#initialize all the hodor locations with not processed flag (later to be for exploiting repetitive hodors)
	for ii,elem in enumerate(karaokeData['data']):
		karaokeData['data'][ii]['processed']=0

	#creating mapping between file names and tones
	toneMapp = createToneMappFiles(toneMappFile)

	#processHere the logic for Hodor input file for each word
	karaokeData = hodorFileSelection(karaokeData, toneMapp)

	#do center channel cut
	audio = cutCenterChannel(audio, fs, karaokeData)

	#estimate the possible repetitions in the karaoke data, i.e. output with same note and duration
	print len(karaokeData['data'])
	repMTX = estimateRepetitiveHodors(karaokeData)

	emptyTrack  = np.zeros(len(audio))
	emptyTrack = generateHodorTrack(emptyTrack, fs, karaokeData, repMTX)

	audio[:,1] = audio[:,1] + emptyTrack
	audio[:,0] = audio[:,0] + emptyTrack

	outputWav = Sndfile(outputFile, 'w', inputAudio.format, inputAudio.channels, inputAudio.samplerate)
	outputWav.write_frames(audio)
	outputWav.close()
def load_sound(filename):
    """
    load a sound file and return a numpy array

    INFO: The values are normalized between -1 and 1
    :param filename:
    :return: numpy array with (sound_lenght, channels) shape
    """
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes, dtype=np.float64)
    return data, f.samplerate
Exemple #38
0
    def _test_read_write(self, dtype):
        # dirty !
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050

            # Open the test file for reading
            a = Sndfile(ofilename, 'r')
            nframes = a.nframes

            # Open the copy file for writing
            format = Format('wav', _DTYPE_TO_ENC[dtype])
            b = Sndfile(fd, 'w', format, a.channels, a.samplerate)

            # Copy the data in the wav file
            for i in range(nframes / nbuff):
                tmpa = a.read_frames(nbuff, dtype=dtype)
                assert tmpa.dtype == dtype
                b.write_frames(tmpa)
            nrem = nframes % nbuff
            tmpa = a.read_frames(nrem)
            b.write_frames(tmpa)

            a.close()
            b.close()

            # Now, reopen both files in for reading, and check data are
            # the same
            a = Sndfile(ofilename, 'r')
            b = Sndfile(cfilename, 'r')
            for i in range(nframes / nbuff):
                tmpa = a.read_frames(nbuff, dtype=dtype)
                tmpb = b.read_frames(nbuff, dtype=dtype)
                assert_array_equal(tmpa, tmpb)

            a.close()
            b.close()

        finally:
            close_tmp_file(rfd, cfilename)
    def _test_read_write(self, dtype):
        # dirty !
        ofilename = join(TEST_DATA_DIR, 'test.wav')
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050

            # Open the test file for reading
            a = Sndfile(ofilename, 'r')
            nframes = a.nframes

            # Open the copy file for writing
            format = Format('wav', _DTYPE_TO_ENC[dtype])
            b = Sndfile(fd, 'w', format, a.channels, a.samplerate)

            # Copy the data in the wav file
            for i in range(nframes / nbuff):
                tmpa    = a.read_frames(nbuff, dtype=dtype)
                assert tmpa.dtype == dtype
                b.write_frames(tmpa)
            nrem = nframes % nbuff
            tmpa = a.read_frames(nrem)
            b.write_frames(tmpa)

            a.close()
            b.close()

            # Now, reopen both files in for reading, and check data are
            # the same
            a = Sndfile(ofilename, 'r')
            b = Sndfile(cfilename, 'r')
            for i in range(nframes / nbuff):
                tmpa = a.read_frames(nbuff, dtype=dtype)
                tmpb = b.read_frames(nbuff, dtype=dtype)
                assert_array_equal(tmpa, tmpb)

            a.close()
            b.close()

        finally:
            close_tmp_file(rfd, cfilename)
    def offs(self, track1, track2):
        """
        offs(audiofile track1, audiofile track2)
        calculates the head offset between two (supposedly) otherwise identitical audio files
        this is achieved via finding the peak-to-peak difference of the waveform heads
        """

        # opens files for reading
        try:
            track_one_file_obj = Sndfile(track1.encode('utf-8'), 'r')
        except:
            print('Corrupted File 1 : '+ track1)
            return
            pass

        try:
            track_two_file_obj = Sndfile(track2, 'r')
        except:
            print('Corrupted File 2 : '+ track2)
            return
            pass

        # calculates the head of each file (first twentieth of the waveform)
        # if this is less than 5 seconds of audio (that is, the waveform is under 100 seconds long)
        # then the head is the first five seconds of the waveform
        track_one_file_obj_head = floor(.05 * track_one_file_obj.nframes)
        if track_one_file_obj_head < (track_one_file_obj.samplerate * 5):
            track_one_file_obj_head = track_one_file_obj.nframes

        track_two_file_obj_head = floor(.05 * track_two_file_obj.nframes)
        if track_two_file_obj_head < (track_two_file_obj.samplerate * 5):
            track_two_file_obj_head = track_two_file_obj.nframes

        # reads the head of each file (as absolute values, accounting for reversed waveforms)
        # into a 1-dimensional numpy matrix (via mono function)
        numpy_matrix_of_track1 = self.mono(np.absolute(track_one_file_obj.read_frames(track_one_file_obj_head)))
        numpy_matrix_of_track2 = self.mono(np.absolute(track_two_file_obj.read_frames(track_two_file_obj_head)))

        # returns the difference between the peak of each list
        return np.argmax(numpy_matrix_of_track1) - np.argmax(numpy_matrix_of_track2)
def analysefile(path, hopsize=0.5, mode='ch', numtop=1, framesize = 1024, chrm_kwargs=None, maxdursecs=None):
	"""Analyses an audio file from disk, dividing into lapped frames and returning an array holding [raw, peaks, slopecent] for each frame.
	Can also do plain FFT-type analysis as an alternative."""
	if (mode != 'ch') and (mode != 'fft'):
		raise ValueError('Mode %s not recognised' % mode)
	if not os.path.isfile(path):
		raise ValueError("path %s not found" % path)
	sf = Sndfile(path, "r")
	if sf.channels != 1:
		raise Error("ERROR in chirpletringmod: sound file has multiple channels (%i) - mono audio required." % sf.channels)
	#print sf.format
	if maxdursecs!=None:
		maxdurspls = maxdursecs * sf.samplerate
	else:
		maxdurspls = sf.nframes

	if chrm_kwargs != None:
		chrm_kwargs = deepcopy(chrm_kwargs)
		chrm_kwargs['samplerate'] = sf.samplerate
		chrm_kwargs['framesize']  = framesize
	else:
		chrm_kwargs = {'samplerate':sf.samplerate, 'framesize':framesize}

	ch = chirpletringmod.Chirpletringmod(**chrm_kwargs)

	ihop = int(hopsize * ch.framesize)
	unhop = ch.framesize - ihop
	numspecframes = sf.nframes / ihop
	print "File contains %i spectral frames" % numspecframes
	storeraw = numspecframes < 500
	frames = []
	moretocome = True
	data = zeros(ch.framesize, float32)
	while(moretocome):
		try:
			nextdata = sf.read_frames(ihop, dtype=float32)
		except RuntimeError:
			#print "sf.read_frames runtime error, assuming EOF"
			moretocome = False
		if len(nextdata) != ihop:
			print "data truncated, detected EOF"
			moretocome = False
			nextdata = hstack((nextdata, zeros(ihop - len(nextdata))))
		data = hstack(( data[ihop:],  nextdata ))

		frames.append(ch.analyseframeplusfeatures(data, hopsize, mode, numtop, storeraw))

		if len(data) >= maxdurspls:
			break

	sf.close()
	return {'ch':ch, 'frames':frames, 'srate':sf.samplerate, 'hopsize':hopsize, 'framesize':ch.framesize}   # the ch knows srate and framesize, why are we duplicating?
Exemple #42
0
def offs(track1, track2):
    # opens files for reading
    s1 = Sndfile(track1, 'r')
    s2 = Sndfile(track2, 'r')

    # calculates the head of each file (first twentieth of the waveform)
    # if this is less than 5 seconds of audio (that is, the waveform is under 100 seconds long)
    # then the head is the first five seconds of the waveform
    s1head = floor(.05 * s1.nframes)
    if s1head < (s1.samplerate * 5):
        s1head = s1.nframes
    s2head = floor(.05 * s2.nframes)
    if s2head < (s2.samplerate * 5):
        s2head = s2.nframes

    # reads the head of each file (as absolute values, accounting for reversed waveforms)
    # into a 1-dimensional numpy matrix (via mono function)
    t1 = mono(np.absolute(s1.read_frames(s1head)))
    t2 = mono(np.absolute(s2.read_frames(s2head)))

    # returns the difference between the peak of each list
    return np.argmax(t1) - np.argmax(t2)
def offs(track1, track2):
	# opens files for reading
	s1 = Sndfile(track1, 'r')
	s2 = Sndfile(track2, 'r')

	# calculates the head of each file (first twentieth of the waveform)
	# if this is less than 5 seconds of audio (that is, the waveform is under 100 seconds long)
	# then the head is the first five seconds of the waveform
	s1head = floor(.05 * s1.nframes)
	if s1head < (s1.samplerate * 5):
		s1head = s1.nframes
	s2head = floor(.05 * s2.nframes)
	if s2head < (s2.samplerate * 5):
		s2head = s2.nframes
	
	# reads the head of each file (as absolute values, accounting for reversed waveforms)
	# into a 1-dimensional numpy matrix (via mono function)
	t1 = mono(np.absolute(s1.read_frames(s1head)))
	t2 = mono(np.absolute(s2.read_frames(s2head)))
	
	# returns the difference between the peak of each list
	return np.argmax(t1) - np.argmax(t2)
def plotSpectrogram(f,mode,channel):
    plt.close('all')
    # Extracting the name from the '.wav' file
    length = len(f)-1
    name = f[length-length :length-3]
    print "Processing: %s CH %s" % (name ,channel)
    plt.figure(figsize=(10.5,3), dpi=100); #figsize=(13,4)


    try:
# Some sound information
#	print "Input to Sndfile FN is %s" %f
        r = Sndfile(f)
        begin = 0 * r.samplerate
        stop = 59.8 * r.samplerate
        sample=r.read_frames(stop-begin)
# Setting out of some spectrogram variables
        Fs =  r.samplerate  #10000  
        NFFT = int(Fs*0.05) #*0.005)  # 5ms window
        noverlap = int(Fs*0.0025)
# Plotting
        fig= plt.specgram(sample[:,channel],Fs=Fs, NFFT=NFFT,noverlap=noverlap,
                         cmap=plt.get_cmap('jet'))
    except:
        print "Could not process %s" % name
        plt.figtext(0.5,0.5,"ERROR")
# Colourmap values that work well are: 'binary','bone' and 'jet'
  

    # plt.title(name)
    plt.title("CH"+str(channel)+" : "+f)
    plt.xticks([],[])  #gets rid of the x ticks and numbers
    plt.yticks([],[])  #gets rid of the y ticks and numbers

   
            # For normal plotting
            #    plt.title(f)
               
            #    plt.xlabel("Time (s)")
            #    plt.yticks([2000,4000,6000,8000,10000],[2,4,6,8,10])
            #    plt.ylabel("Frequency (kHz)")
                #plt.colorbar()
    try:
        # plt.savefig("./"+ name + ".png",fig=fig, bbox_inches='tight')
        plt.savefig(name+"CH"+str(channel)+".png",fig=fig, bbox_inches='tight')  #Save the results
        # if spectrogram has been sucessfully generated add it to the list of images that has$
        namePNG = "./" + name+"CH"+str(channel) + ".png"
        listN.append(namePNG)
#        print "Done."
    except:
        print "ERROR: %s" % name
def decompose(inpath, outdir='output', niters=3, framesize=1024, hopsize=0.5, writefiles=True, wintype='hann'):
	"""Given a path to an input file, runs a pursuit iteration to decompose the signal into atoms.
	Writes out quite a lot of files - for each iteration, partial resynth, total resynth, residual.
	Also returns the aggregated peaks and the residual."""
	if not os.path.isfile(inpath):
		raise ValueError("path %s not found" % inpath)
	sf = Sndfile(inpath, "r")
	if sf.channels != 1:
		raise Error("ERROR in chirpletringmod: sound file has multiple channels (%i) - mono audio required." % sf.channels)

	ch = chirpletringmod.Chirpletringmod(samplerate=sf.samplerate, framesize=framesize, wintype=wintype)
	signal = sf.read_frames(sf.nframes, dtype=float32)
	sf.close()

	outnamestem = "%s/%s" % (outdir, os.path.splitext(os.path.basename(inpath))[0])

	resynthtot = zeros(len(signal))
	aggpeaks = []
	residual = signal
	print("chf.decompose: original signal energy %g" % sum(signal ** 2))
	for whichiter in range(niters):
		print("----------------------------------------")
		print("iteration %i" % whichiter)

		iterdata = ch.decompose_oneiter(residual, hopsize=hopsize)
		"""Given an input signal, decomposes it a bit like one round of matching-pursuit or suchlike, with the added constraint of
		one detection per frame. Returns the peaks found, the resynthesised version, and the residual."""
		#return {'peaks':framespeaks, 'resynth':resynth, 'residual':residual}

		resynthtot += iterdata['resynth']
		aggpeaks.extend(iterdata['peaks'])

		if writefiles:
			sf = Sndfile("%s_%i_resynth.wav" % (outnamestem, whichiter), "w", Format(), 1, ch.sr)
			sf.write_frames(iterdata['resynth'])
			sf.close()

			sf = Sndfile("%s_%i_resynthtot.wav" % (outnamestem, whichiter), "w", Format(), 1, ch.sr)
			sf.write_frames(resynthtot)
			sf.close()

			sf = Sndfile("%s_%i_residual.wav" % (outnamestem, whichiter), "w", Format(), 1, ch.sr)
			sf.write_frames(iterdata['residual'])
			sf.close()
		residual = iterdata['residual'] # fodder for next iter

		print("resynth    signal energy %g" % sum(iterdata['resynth'] ** 2))
		print("resynthtot signal energy %g" % sum(resynthtot ** 2))
		print("residual   signal energy %g" % sum(residual   ** 2))

	return {'ch':ch, 'peaks':aggpeaks, 'residual':residual}
Exemple #46
0
def main():
   audioFiles = glob.glob("testSamples/*")
   for audioFile in audioFiles:
      snd = Sndfile(audioFile, "r")
      data = snd.read_frames(snd.nframes)
      fs = snd.samplerate
      (frames, freqs, bins, ax) = mp.specgram(data, frameSize, 
            noverlap=(frameSize/2), Fs=fs)
      mp.subplot(211)
      mp.plot(np.linspace(0,float(snd.nframes)/fs, snd.nframes),
            data * 10000+10000, alpha=0.4)
      mp.subplot(212)
      mp.plot(bins[0:-1], getSlices(frames, bins, 20))
      mp.show()
Exemple #47
0
def CQT_stacked(filename, fmin=None, n_bins=84, hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    cqt = librosa.cqt(data, sr=f.samplerate, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    if nfreqs != None:
        cqt = cqt[:nfreqs,:]
    delta1 = librosa.feature.delta(cqt,order=1)
    delta2 = librosa.feature.delta(cqt,order=2)
    d,L    = cqt.shape
    cqt = cqt.T.reshape(1,L,d)
    delta1 = delta1.T.reshape(1,L,d)
    delta2 = delta2.T.reshape(1,L,d)
    features = np.vstack((cqt,delta1,delta2))
    return features
def readwavefile(inputwav):
	f = Sndfile(inputwav, 'r')
	fs = f.samplerate
	if fs != 44100 :
		print 'only 44.1kHz filess are supported at present'
		exit(1)
	nc = f.channels
	if nc != 1 :
		print 'only 1 channel supported at present'
		exit(1)
	nframes =  f.nframes
	wav = f.read_frames(nframes, dtype=np.float32)
	f.close()
	return wav
Exemple #49
0
def logmel(filename,n_fft=2048,hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length)
    logmel = librosa.core.logamplitude(melspectrogram)
    if nfreqs != None:
        logmel = logmel[:nfreqs,:]
    energy = librosa.feature.rmse(y=data)
    spectr = np.vstack((logmel,energy))
    delta1 = librosa.feature.delta(spectr,order=1)
    delta2 = librosa.feature.delta(spectr,order=2)

    features = np.vstack((spectr,delta1,delta2))
    return features.T
Exemple #50
0
def logmel_stacked(filename,n_fft=2048,hop_length=512,nfreqs=None):
    f = Sndfile(filename, 'r')
    data = f.read_frames(f.nframes)
    melspectrogram = librosa.feature.melspectrogram(y=data, sr=f.samplerate, n_fft=n_fft, hop_length=hop_length)
    logmel = librosa.core.logamplitude(melspectrogram)
    if nfreqs != None:
        logmel = logmel[:nfreqs,:]
    delta1 = librosa.feature.delta(logmel,order=1)
    delta2 = librosa.feature.delta(logmel,order=2)
    d,L    = logmel.shape
    logmel = logmel.T.reshape(1,L,d)
    delta1 = delta1.T.reshape(1,L,d)
    delta2 = delta2.T.reshape(1,L,d)
    features = np.vstack((logmel,delta1,delta2))
    return features
Exemple #51
0
def convert(filename, name):
    f = Sndfile(filename, 'r')
    fs = f.samplerate
    nc = f.channels
    enc = f.encoding

    data = f.read_frames(f.nframes)

    new_name = '/home/bitnami/apps/django/django_projects/Project/sonic_bar_code/static/newfile.wav'
    format = Format('wav')
    # f = Sndfile(new_name, 'w', format, 1, fs)
    f = Sndfile(new_name, 'w', format, nc, fs)
    f.write_frames(data)
    f.close()

    return new_name
def process_recording(filename,
                      window_width=.03,
                      window_spacing=.02,
                      num_coeffs=40,
                      mel_encode=True):
    f = Sndfile(filename, 'r')
    fs = f.samplerate
    nc = f.channels
    enc = f.encoding
    n = f.nframes
    data = f.read_frames(n)

    samples = int(fs * window_width)
    num_windows = int((len(data) / (fs * window_spacing))) - 1
    freqs = np.fft.rfftfreq(samples, d=1. / fs)

    if len(freqs) % 2 == 1:
        idx = len(freqs) / 2
        pos_freqs = freqs[len(freqs) / 2:]
    else:
        idx = len(freqs) / 2 - 1
        pos_freqs = freqs[len(freqs) / 2 - 1:]

    spectragram = np.empty((num_windows, len(pos_freqs)))

    for i in range(num_windows):
        left = ((i + 1) * fs * window_spacing) - int(samples / 2)
        right = ((i + 1) * fs * window_spacing) + int(math.ceil(samples / 2))
        window = data[left:right]
        spectragram[i] = np.abs(np.fft.rfft(window)[idx:])

    edges = np.linspace(0, fs / 2., num=(num_coeffs + 2))
    if mel_encode:
        edges = mel_transform(edges)
    filter_bank = np.matrix(np.empty((num_coeffs, len(pos_freqs))))
    for i in range(num_coeffs):
        for j in range(len(pos_freqs)):
            if edges[i] <= pos_freqs[j] <= edges[i + 2]:
                filter_bank[i, j] = triangle(edges[i], edges[i + 1],
                                             edges[i + 2], pos_freqs[j])

    coeffs = np.empty((num_windows, num_coeffs))
    for i in range(num_windows):
        coeffs[i] = np.transpose(filter_bank *
                                 np.transpose(np.matrix(spectragram[i])))

    return np.transpose(coeffs), np.transpose(spectragram)
Exemple #53
0
    def test_float_frames(self):
        """ Check nframes can be a float"""
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            # Open the file for writing
            format = Format('wav', 'pcm16')
            a = Sndfile(fd, 'rw', format, channels=1, samplerate=22050)
            tmp = np.random.random_integers(-100, 100, 1000)
            tmp = tmp.astype(np.short)
            a.write_frames(tmp)
            a.seek(0)
            a.sync()
            ctmp = a.read_frames(1e2, dtype=np.short)
            a.close()

        finally:
            close_tmp_file(rfd, cfilename)
    def file_to_features(self, wavpath):
        "Reads through a mono WAV file, converting each frame to the required features. Returns a 2D array."
        if verbose:
            self.count = self.count + 1
            print("Reading %s :" % wavpath)
#print self.count
        if not os.path.isfile(wavpath):
            raise ValueError("path %s not found" % wavpath)
        sf = Sndfile(wavpath, "r")
        #if (sf.channels != 1) and verbose: print(" Sound file has multiple channels (%i) - channels will be mixed to mono." % sf.channels)
        if sf.samplerate != fs:
            raise ValueError("wanted sample rate %g - got %g." %
                             (fs, sf.samplerate))
        window = np.hamming(framelen / 2)  #check here
        features = []
        while (True):
            try:
                chunk = sf.read_frames(
                    framelen / 2, dtype=np.float32
                )  #read each window sized value from the audio
                sf.seek(-framelen / 4,
                        1)  #take the current pointer backward for overlap
                if len(chunk) != framelen / 2:
                    print("Not read sufficient samples - returning")
                    break
                if sf.channels != 1:
                    chunk = np.mean(chunk, 1)  # mixdown
                framespectrum = np.fft.fft(
                    window * chunk, framelen / 2
                )  # first the window type is implemented then the padding is done here
                magspec = abs(framespectrum[:framelen / 2])

                # do the frequency warping and MFCC computation
                melSpectrum = self.mfccMaker.warpSpectrum(magspec)
                melCepstrum = self.mfccMaker.getMFCCs(melSpectrum, cn=True)
                melCepstrum = melCepstrum[1:]  # exclude zeroth coefficient
                melCepstrum = melCepstrum[:13]  # limit to lower MFCCs

                framefeatures = melCepstrum

                features.append(framefeatures)
            except RuntimeError:
                break
        sf.close()
        return np.array(features)
Exemple #55
0
def analyze(filename):
    wave_file = Sndfile(filename, 'r')
    signal = wave_file.read_frames(wave_file.nframes)
    channels = wave_file.channels
    sample_rate = wave_file.samplerate
    header = 'dBFS values are relative to a full-scale square wave'

    results = [
        'Properties for "' + filename + '"',
        str(wave_file.format),
        'Channels:\t%d' % channels,
        'Sampling rate:\t%d Hz' % sample_rate,
        'Samples:\t%d' % wave_file.nframes,
        'Length: \t' + str(wave_file.nframes / sample_rate) + ' seconds',
        '-----------------',
    ]

    wave_file.close()

    if channels == 1:
        # Monaural
        results += properties(signal, sample_rate)
    elif channels == 2:
        # Stereo
        if array_equal(signal[:, 0], signal[:, 1]):
            results += ['Left and Right channels are identical:']
            results += properties(signal[:, 0], sample_rate)
        else:
            results += ['Left channel:']
            results += properties(signal[:, 0], sample_rate)
            results += ['Right channel:']
            results += properties(signal[:, 1], sample_rate)
    else:
        # Multi-channel
        for ch_no, channel in enumerate(signal.transpose()):
            results += ['Channel %d:' % (ch_no + 1)]
            results += properties(channel, sample_rate)

    display(header, results)

    plot_histogram = False
    if plot_histogram:
        histogram(signal)
Exemple #56
0
def load_wav(fname, rate=None):
    fp = Sndfile(fname, 'r')
    _signal = fp.read_frames(fp.nframes)
    _signal = _signal.reshape((-1, fp.channels))
    _rate = fp.samplerate

    if _signal.ndim == 1:
        _signal.reshape((-1, 1))
    if rate is not None and rate != _rate:
        signal = resampy.resample(_signal,
                                  _rate,
                                  rate,
                                  axis=0,
                                  filter='kaiser_best')
    else:
        signal = _signal
        rate = _rate

    return signal, rate
Exemple #57
0
def convertAndRemoveVoice(inputfile):
    #print strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
    song_folder = os.getcwd() + '/'

    print inputfile[-3:]
    mp3_file = song_folder + inputfile
    wav_file = song_folder + inputfile[:-4] + '.wav'  #'song.wav'
    command = "ffmpeg " + "-i " + '"' + mp3_file + '"' + " -y " + " -ac 2 " + " -ar 44100 " + '"' + wav_file + '"'
    #print '\n'
    print command
    try:
        p = subprocess.Popen(command,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)
        output = p.communicate()[0]
        #lyricfileess = song_folder + inputfile.replace('.mp3','_beatSynced.json')
        #origfileess = song_folder + inputfile.replace('.mp3','_original.json')
    except:
        print 'wav conversion problem'
        return 0
    original_wav = Sndfile(wav_file, 'r')
    audio = original_wav.read_frames(original_wav.nframes)
    #return audio
    #audio /= float(np.max(abs(audio)))  # normalize audio
    #outputAudio = np.zeros(original_wav.nframes)
    #print type(outputAudio)
    #for idx,frame in enumerate(audio):
    #print idx
    #print frame
    outputAudio = (audio[:, 0] - audio[:, 1]) / 2
    #print len(audio)
    print 2
    new_filename = wav_file.replace('.wav', '_VocalRemoved.wav')
    print new_filename
    output_wav = Sndfile(new_filename, 'w', original_wav.format, 1,
                         original_wav.samplerate)
    output_wav.write_frames(outputAudio)
    output_wav.close()
    #original_wav.close()
    return 1
Exemple #58
0
def getWordSegmentation(path):
    """
    Function to get the segmented frame data of the audio wave and its
    corresponding segmented word transcription
    """

    f_names = []
    frame_data = []
    word_seg = []
    # Find all .wav files
    for root, dirs, files in os.walk(path):
        for f_name in files:
            f_name, ext = f_name.split(".")
            if ext == "WAV":
                f_names.append(os.path.join(root, f_name))

    for f_name in f_names:
        audio_file = Sndfile(f_name + ".WAV", "r")

        # Get audio
        audio = audio_file.read_frames(audio_file.nframes)

        # Get transcription
        word_segmentation = open(f_name + ".WRD",
                                 "r").read().strip().split("\n")
        temp_seg = []
        temp_frames = []
        for word in word_segmentation:
            # Get corresponding time frame of audio wave
            start_time, end_time, word = word.split(" ")
            temp_seg.append((word, (start_time, end_time)))
            frame = audio[int(start_time):int(end_time)]
            temp_frames.append(frame)

        frame_data.append(temp_frames)
        word_seg.append(temp_seg)

    # Store all the data
    print "Taking a dump.."
    joblib.dump(np.asarray(frame_data), "frame_data.npy")
    pickle.dump(word_seg, open("word_seg.p", "w"))