Beispiel #1
0
    def new_numeral_captcha_on_words(self, fname):
        wordstr = wordstrgen.get_random_wordstr(self.wordbank, self.nwords)
        numstr = wordstrgen.get_random_numstr(self.nnums)

        ensure_dir('temp')
        #these are the filenames of the audio files
        wordaudio = speechsynth.make_audio(wordstr, 'words', './temp/')
        numaudio = speechsynth.make_audio(numstr, 'nums', './temp/')

        # read audio data
        wordaudio_data, fs_word, enc_word = wavread(wordaudio)
        numaudio_data, fs_num, enc_num = wavread(numaudio)

        wordaudio_data = ensure_equal_length(wordaudio_data, numaudio_data)

        # combine audio data modifying volumes
        captcha_audio = self.noise_vol * wordaudio_data + self.captcha_vol * numaudio_data

        outputfname = self.outputdir + fname

        if (os.path.exists(outputfname)): os.remove(outputfname)

        wavwrite(captcha_audio, outputfname, 22050)

        # return output filename and the answer
        return outputfname, prettify(numstr)
Beispiel #2
0
def main():

    # import soundfile
    snd = wavread('trumpet.wav')[0]
    kick = wavread('kick.wav')[0]
    amb = wavread('amb.wav')[0]
    amb = amb * 0.8  # reduce gain of this soundfile a little bit

    print len(amb)
    #low_demo(snd, 10., 500.)
    #high_demo(snd, 10000., 10.)
    #allpass_demo(snd, 1000, -find_c(1000., fs), find_c(1000., fs), 1.0)
    #iir_comb_demo(kick, 100, 0.5, -0.5)

    t = len(amb) / fs
    period = 1.0 / fs
    t_v = arange(0.0, t, period)
    delayTime = 2.0
    width = 1.0
    freq = 1
    breakPoint = (sin(2. * pi * freq * t_v))
    #breakPoint = linspace(1, -1, len(amb))

    #var_allpass_demo(snd, delayTime / 1000., width / 1000., -find_c(8000, fs), find_c(8000, fs), 1.0, breakPoint)
    #var_allpass_demo(amb, delayTime / 1000., width / 1000., 0.5, -0.5, 0.0, breakPoint)

    # flanger
    var_allpass_demo(amb, delayTime, width, 0.7, 0.7, 0.7, breakPoint)
Beispiel #3
0
def main():

    # import soundfile
    snd = wavread('trumpet.wav')[0]
    kick = wavread('kick.wav')[0]
    amb = wavread('amb.wav')[0]
    amb = amb * 0.8                 # reduce gain of this soundfile a little bit
    
    print len(amb)
    #low_demo(snd, 10., 500.)
    #high_demo(snd, 10000., 10.)
    #allpass_demo(snd, 1000, -find_c(1000., fs), find_c(1000., fs), 1.0)
    #iir_comb_demo(kick, 100, 0.5, -0.5)

    t = len(amb) / fs
    period = 1.0 / fs
    t_v = arange(0.0, t, period)
    delayTime = 2.0
    width = 1.0
    freq = 1
    breakPoint = (sin(2. * pi * freq * t_v))
    #breakPoint = linspace(1, -1, len(amb))

    #var_allpass_demo(snd, delayTime / 1000., width / 1000., -find_c(8000, fs), find_c(8000, fs), 1.0, breakPoint)
    #var_allpass_demo(amb, delayTime / 1000., width / 1000., 0.5, -0.5, 0.0, breakPoint)

    # flanger
    var_allpass_demo(amb, delayTime, width, 0.7, 0.7, 0.7, breakPoint)
Beispiel #4
0
def noise_reduce_test():
	sample = wavread('../../sounds/single-bloop-trimmed.wav')[0]
	noise = wavread('../../sounds/single-bloop-noise.wav')[0]
	sample = bandpass(sample,30000,50000)
	t0 = time.time()
	sample = noise_reduce(sample,noise,NoiseReduceSettings())
	print 'noise filter in time:', round(time.time() - t0,2)
	'''
Beispiel #5
0
    def __init__(self, audio, mic_amount, trials, proc_number):

        logging.info('Starting.')
        self.proc_numer = proc_number
        # the magic of preparing audio data; from numpy arrays to flatten list with removed duplicated elements
        self.wave = wavread(audio)[
            0]  # removing wav technical data; only audio data stays
        self.wave = [list(pair) for pair in self.wave]
        audio_data = numpy.array(self.wave)
        self.wave = list(audio_data.flatten())
        self.wave = self.wave[::2]
        self.wave = numpy.array(self.wave).reshape(-1, 1)

        self.scale = 0.8 / max(self.wave)
        self.wave = numpy.multiply(self.scale, self.wave)

        self.sample = float(wavread(audio)[1])
        print '\nSampling rate used: ' + str(self.sample)

        self.trials = trials
        self.__microphone_amount = mic_amount

        self.subArrays_X = []
        self.subArrays_Y = []
        self.subArrays_Z = []
        self.element = [0]
        self.generate_combinations(X_dict, Y_dict, Z_dict, 4)
        #print self.subArrays
        X_receiver = []
        Y_receiver = []
        Z_receiver = []
        indices_X = self.subArrays_X[0]
        indices_Y = self.subArrays_Y[0]
        indices_Z = self.subArrays_Z[0]
        for i, j, k in zip(indices_X, indices_Y, indices_Z):
            X_receiver.append(i)
            Y_receiver.append(j)
            Z_receiver.append(k)

        self.X = [X_receiver[i] for i in range(4)]
        self.Y = [Y_receiver[i] for i in range(4)]
        self.Z = [Z_receiver[i] for i in range(4)]

        self.sensor_positions = numpy.column_stack((self.X, self.Y, self.Z))
        self.true_positions = numpy.zeros((self.trials, 3))
        self.estimated_positions = numpy.zeros((self.trials, 3))

        self.distances = []
        self.time_delays = []
        self.padding = []

        print '\nReceiver Locations:'
        for i in range(len(self.X)):
            print 'Receiver ' + str(i + 1) + ': X: ' + str(
                self.X[i]) + '        Y: ' + str(self.Y[i]) + '     Z: ' + str(
                    self.Z[i])
        print '\n'
        logging.info('Inited core.')
Beispiel #6
0
def cut_silence_in_sound(source, target, rmsTreshhold=-40, WndSize=128):
    """
	source : fsource audio file
	target : output sound
	This function cuts the silence at the begining and at the end of an audio file in order. 
	It's usefull for normalizing the length of the audio stimuli in an experiment.
	The default parameters were tested with notmal speech.
	"""
    NbofWrittendFiles = 1
    x, fs, enc = wavread(str(source))
    index = 0

    #Remove the silence at the begining
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            beginning = index
            print beginning / 44100
            break

    #Remove the silence at the end
    x, fs, enc = wavread(str(source))
    WndSize = 128
    index = 0
    x = list(reversed(x))

    while index + WndSize < len(x):
        DataArray = x[int(index):int(index + WndSize)]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            final = index
            print(len(x) - final) / 44100
            break

    #write the sound source without silences
    x, fs, enc = wavread(str(source))
    WndSize = 128
    rmsTreshhold = -70
    index = 0

    name_of_source = str(os.path.basename(source))
    name_of_source = os.path.splitext(name_of_source)[0]
    path, sourcename = os.path.split(source)
    wavwrite(x[beginning:len(x) - final], target, fs, enc='pcm24')
Beispiel #7
0
def main():
    """
    Main function for processing the specified soundfile through this reverb.
    """

    parser = argparse.ArgumentParser(description='Artificial Reverb')
    parser.add_argument('soundfile', help='audio file to process', type=validInput)        # the soundfile is the first agument, with parameter values to follow
    parser.add_argument('outfile', help='path to output file', type=validInput)
    parser.add_argument('-w', '--wetdry', default=0.2, type=float, help='amount of wet signal in the mix')
    parser.add_argument('-da', '--damping', default=0.25, type=float, help='amount of high frequency damping')
    parser.add_argument('-de', '--decay', default=0.4, type=float, help='amount of attentuation applied to signal to make it decay')
    parser.add_argument('-pd', '--predelay', default=30, type=float, help='amount of time before starting reverb')
    parser.add_argument('-b', '--bandwidth', default=0.6, type=float, help='amount of high frequency attentuation on input')
    parser.add_argument('-t', '--tankoffset', default=0, type=float, help='amount of time (ms) to increase the last tank delay time')

    # Parse the commandline arguments
    args = parser.parse_args()

    # Get the entire path and assign soundfile
    soundfilePath = os.path.join(os.getcwd(), args.soundfile)
    
    # From here on, x refers to the input signal
    x, sampleRate, wavType = wavread(soundfilePath)
    dry = x.copy()

    y = reverbTest(x, sampleRate, args.damping, args.decay, args.predelay, args.bandwidth, args.tankoffset)

    # Apply wet/dry mix
    output = dryWet(dry, y, args.wetdry)

    # Finally write the output file
    wavwrite(transpose(output), args.outfile, sampleRate)
Beispiel #8
0
def wavread(path):
    """
    Wrapper around scikits functions
    Returns: wavdata, sample rate, encoding type
    See pyaudiolab or scikits.audiolab for more information
    """
    return AUDIOLAB.wavread(path)
def computeFeaturesForFullSong(file_path, feature_list, pack_size):
    """
    Computes each of the features (must be full_song features) for the song recording.
    This method is used for one shot computation of a songs features.
    :param file_path:
    :param features:
    :param pack_size:
    :return: a tuple of values with length = len(features). Each item is the resulting feature value corresponding to features[].
    """

    # will hold the evaluated feature values
    feature_values = []

    raw_data, fs, enc = wavread(file_path)
    raw_chunks = chunks(raw_data, pack_size)

    for feature_name in feature_list:
        # print "Computing " + feature_name
        class_ = getattr(features, feature_name)
        if class_.requireFullSong is False: # ensure full song
            raise "Every feature must be a full song feature"

        feature = class_(raw_chunks)
        feature_values.append(feature.value)

    return feature_values
Beispiel #10
0
def sound_wav():
    clf()
    (snd, sampFreq, nBits) = audiolab.wavread('temp.wav')
    wave_form = []
    signal = snd[:,0]
    if (len(signal)) < 500000:
        timeArray = arange(0, float(len(signal)), 1)
        timeArray = timeArray / sampFreq
        wave_form = signal
    else:
        downsample_factor = len(signal) / 30000
        i = 0
        while i < len(signal):
            wave_form = wave_form + [signal[i]]
            i = i + downsample_factor
        timeArray = arange(0, float(len(wave_form)), 1)
        timeArray = timeArray * downsample_factor / sampFreq
    timeArray = timeArray * 1000
    plot(timeArray, wave_form, color='k')
    ylabel('Amplitude')
    xlabel('Time (ms)')
    savefig('wave_form.png', bbox_inches=0)
    # show()
# setup('skream.wav')
# sound_wav()
# teardown()
Beispiel #11
0
 def __init__(self, filepath):
     
     self.filepath = filepath
     (self.audio_array, self.sample_rate, self.format) = wavread(filepath)
     self.name = os.path.basename(filepath)
     samples = len(self.audio_array)
     self.length = float(samples) / float(self.sample_rate)
Beispiel #12
0
def get_RMS_over_time(audio_file, window_size = 1024, in_db = True):
	"""
 	parameters:
		audio_file 	: file to anlayse
		window_size : window size for FFT computing

		returns : time series with the RMS and the time
	
	warning : 
		this function only works for mono files
	"""	
	import glob
	from scikits.audiolab import wavread, aiffread
	from scipy import signal
	import numpy as np

	try:
		sound_in, fs, enc = aiffread(audio_file)
	except ValueError:
		sound_in, fs, enc = wavread(audio_file)

	begin = 0
	values = []
	time_tags = []
	while (begin + window_size) < len(sound_in):
		data = sound_in[begin : begin + window_size]
		time_tag = (begin + (window_size / 2)) / np.float(fs)
		
		values.append(get_rms_from_data(data, in_db = in_db))
		time_tags.append(time_tag)
		begin = begin + window_size

	return time_tags, values
def wavread(path):
    """
    Wrapper around scikits functions
    Returns: wavdata, sample rate, encoding type
    See pyaudiolab or scikits.audiolab for more information
    """
    return AUDIOLAB.wavread(path)
Beispiel #14
0
def loadFiles(path):
	"""reads wave files from path and returns dictionary with fields:
        - "name" - name of file
        - "nameGender" - a sex readed from filename
        - "signal" - numpy array with sound signal readed from file
        - "sampleRate" - sample rate of the file

        and dictionary that contains numbers of male and female voices
	"""
	print "reading files..."

	files = [ f for f in listdir(path) if isfile(join(path,f)) and splitext(f)[1] == ".wav" ]

	samples = []
	maleCount = 0
	femaleCount = 0
	for f in files:
		p = path + '/' + f

		print "...", f
		data,rate,encoding=wavread(p)
		sig=[mean(d) for d in data]    
		samples.append({'name': f, 'nameGender': f[-5:-4], 'signal': sig, 'sampleRate': rate})
        
		if f[-5:-4] == "M":
			maleCount += 1
		else:
			femaleCount += 1
    
	counters = {"maleCount":maleCount, "femaleCount":femaleCount}
	return samples, counters
Beispiel #15
0
 def __init__(self, filepath=None, units=None):
     """
        Can be initialised from a ``filepath`` or from an array of Unit 
        objects, or with no default audio - an empty container.
        If initialised with units or filepath, the data is parsed and 
        ``self._calculate_metadata`` is called to populate ``Mosaic`` 
        attributes.
        
     """
     
     self.units = []
     if filepath:
         self.filepath = filepath
         self.name = os.path.basename(filepath)
         if os.path.isfile(self.filepath):
             (self.data, self.sample_rate, type_format) = wavread(filepath)
             self._calculate_metadata()
         else:
             self.samples = 0
             self.sample_rate = 44100
             self.length = 0
             self.data = None
     elif units:
         self.units = units
         self.data = self._make_data(units)
         self.sample_rate = 44100
         self._calculate_metadata()
     else:
         self.samples = 0
         self.sample_rate = 44100
         self.length = 0
         self.data = None
Beispiel #16
0
    def _analyse(self, filepath):
        audio = to_mono(wavread(filepath)[0])
        audio = audio.astype('float32')
        
        w = Windowing(type = 'hann')
        fft = FFT() # this gives us a complex FFT
        c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)
        hfc_detect = OnsetDetection(method = 'hfc')
        complex_detect = OnsetDetection(method = 'complex')
        rms_detect = RMS()
        spec = Spectrum()
        #pd = PitchDetection()
        flux = Flux()
        pool = Pool()
        #wap = WarpedAutoCorrelation()
        
    
        # let's get down to business
        print 'Computing onset detection functions...'
        for frame in FrameGenerator(audio, frameSize = self.frame_size,\
            hopSize = self.hop_size):
            mag, phase, = c2p(fft(w(frame)))
            spectrum = spec(w(frame))
            f = flux(spectrum)
            #pitch = pd(spectrum)
            pool.add('hfc', hfc_detect(mag, phase))
            pool.add('complex', complex_detect(mag, phase))
            pool.add('rms', rms_detect(frame))
            pool.add('flux', f)
            #pool.add('pitch', pitch[0])
        #print pool['pitch']
        #pool.add('autoc', wap(pool['pitch']))
     

        return pool, audio
    def estimate_f0s(self, audio_path):
        if not os.path.exists(audio_path):
            raise ValueError('Invalid audio path')

        x, fs, _ = wavread(audio_path)

        # make x mono if stereo
        if x.ndim > 1:
            _, n_channels = x.shape
            x = x.sum(axis=1)/n_channels

        X = self._stft(x, fs)

        # Section 2.1 Spectrally whiten the signal to suppress timbral information
        Y = self._spectral_whitening(X, fs)

        # perform iterative estimation of the fundamental periods in the audio file
        f0_estimations = self._iterative_est(Y, fs)
        
        # get notes which correspond to these frequency estimates
        notes = []
        for frame_ests in f0_estimations:
            notes.append([self._freq_to_note(f) for f in frame_ests])

        return f0_estimations, notes
Beispiel #18
0
def sibilant_detector(filename):
    """
	The aim of this algorithm is to detect where are the parts in filename where the energy is maximal.
	This algorithm works as follows:
	1- First compute the spectrogram
	2- Then compute a gaussian curve centered in the frequency researched. Usually for sibilants it's around 6000 Hz
	3- Multiply the spectrum and the gaussian in order to weight the spectrum
	4- Mean all the resultant signal and normalize
	5- The peaks in the resulting signal are the parts in time where the energy in the researched area is the most important.
	"""
    sound_data, fs, enc = wavread(filename)

    #Gaussian coefs
    sigma = 5
    mu = 10000  # mean frequency
    NFFT = 512

    #Spectre
    Pxx, freqs, bins, im = specgram(sound_data, NFFT=NFFT, noverlap=128, Fs=fs)
    show()

    #Siflantes detector
    nb_of_windows = Pxx.shape[1]
    nb_of_fft_coefs = Pxx.shape[0]

    #Compute the gaussian vector and plot
    weights = weighting_vector(nb_of_fft_coefs, sigma, mu, fs)
    f_wweights = np.linspace(0, fs / 2, len(weights), endpoint=True)
    plot(f_wweights, weights)
    show()

    fft_coeficients = np.zeros(nb_of_fft_coefs)
    sibilant_desc = []
    weighted_ffts = []

    #Multiply the weights and the spectrum and show the multiplication
    for i in range(nb_of_windows):
        weighted_fft = Pxx[:, i] * weights

        if len(weighted_ffts) == 0:
            weighted_ffts = weighted_fft
        else:
            weighted_ffts = np.c_[weighted_ffts, weighted_fft]

        sibilant_desc.append(sum(weighted_fft))

    imshow(weighted_ffts, interpolation='nearest', aspect='auto')
    show()

    #Now mean the matrix to have only one descriptor
    sibilant_desc = [float(i) / max(sibilant_desc) for i in sibilant_desc]
    plot(sibilant_desc)
    show()

    #export audio
    max_index, max_value = max(enumerate(sibilant_desc),
                               key=operator.itemgetter(1))
    wavwrite(sound_data[(max_index - 5) * NFFT:(max_index + 5) * NFFT],
             'test.wav',
             fs=44100)
Beispiel #19
0
    def test_bad_wavread(self):
        """ Check wavread on bad file"""
        # Create a tmp audio file with non wav format, write some random data into it,
        # and check it can not be opened by wavread
        rfd, fd, cfilename = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050
            noise = 0.1 * N.random.randn(nbuff)

            # Open the copy file for writing
            format = audio_format('aiff', 'pcm16')
            b = Sndfile(cfilename, 'w', format, 1, nbuff)

            b.write_frames(noise)

            b.close()

            b = Sndfile(cfilename, 'r')
            rcnoise = b.read_frames(nbuff)
            b.close()

            try:
                rnoise = wavread(cfilename)[0]
                raise Exception(
                    "wavread on non wav file succeded, expected to fail")
            except ValueError, e:
                pass
                #print str(e) + ", as expected"

        finally:
            close_tmp_file(rfd, cfilename)
    def test_bad_wavread(self):
        """ Check wavread on bad file"""
        # Create a tmp audio file with non wav format, write some random data into it,
        # and check it can not be opened by wavread
        rfd, fd, cfilename   = open_tmp_file('pysndfiletest.wav')
        try:
            nbuff = 22050
            noise = 0.1 * N.random.randn(nbuff)

            # Open the copy file for writing
            format = audio_format('aiff', 'pcm16')
            b = Sndfile(cfilename, 'w', format, 1, nbuff)

            b.write_frames(noise)

            b.close()

            b = Sndfile(cfilename, 'r')
            rcnoise = b.read_frames(nbuff)
            b.close()

            try:
                rnoise  = wavread(cfilename)[0]
                raise Exception("wavread on non wav file succeded, expected to fail")
            except ValueError, e:
                pass
                #print str(e) + ", as expected"

        finally:
            close_tmp_file(rfd, cfilename)
Beispiel #21
0
def feature_extraction(wav_fd, fe_fd):
    names = [na for na in os.listdir(wav_fd) if na.endswith('.wav')]
    names = sorted(names)
    for na in names:
        print na
        path = wav_fd + '/' + na
        wav, fs, enc = wavread( path )        
        if wav.ndim == 2:
            wav=np.mean(wav, axis=-1)
        ham_win = np.hamming(n_fft)
        [f, t, x] = signal.spectral.spectrogram(x=wav, 
                                                window=ham_win, 
                                                nperseg=n_fft, 
                                                noverlap=0, 
                                                detrend=False, 
                                                return_onesided=True, 
                                                mode='magnitude') 
        x = x.T
        if globals().get('melW') is None:
            global melW
            melW = librosa.filters.mel(sr=fs, 
                                       n_fft=n_fft, 
                                       n_mels=64, 
                                       fmin=0., 
                                       fmax=22100)
        x = np.dot(x, melW.T)
        out_path = fe_fd + '/' + na[0:-4] + '.f'
        cPickle.dump(x, open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
Beispiel #22
0
    def load_data(self):
        """
        Load the audio file data.

        This function works only for mono wav files!
        """
        self._log("Loading audio data")

        # check the file can be read
        if self.file_path is None:
            raise AttributeError("File path is None")
        if not os.path.isfile(self.file_path):
            self._log(["File '%s' cannot be read", self.file_path],
                      Logger.CRITICAL)
            raise OSError("File cannot be read")

        self._log("Loading wav file...")
        self.audio_data, self.audio_sample_rate, self.audio_format = wavread(
            self.file_path)
        self.audio_length = (float(len(self.audio_data)) /
                             self.audio_sample_rate)
        self._log(["Sample length: %f", self.audio_length])
        self._log(["Sample rate:   %f", self.audio_sample_rate])
        self._log(["Audio format:  %s", self.audio_format])
        self._log("Loading wav file... done")
    def estimate_f0s(self, audio_path):
        if not os.path.exists(audio_path):
            raise ValueError('Invalid audio path')

        x, fs, _ = wavread(audio_path)

        # make x mono if stereo
        if x.ndim > 1:
            _, n_channels = x.shape
            x = x.sum(axis=1) / n_channels

        X = self._stft(x, fs)

        # Section 2.1 Spectrally whiten the signal to suppress timbral information
        Y = self._spectral_whitening(X, fs)

        # perform iterative estimation of the fundamental periods in the audio file
        f0_estimations = self._iterative_est(Y, fs)

        # get notes which correspond to these frequency estimates
        notes = []
        for frame_ests in f0_estimations:
            notes.append([self._freq_to_note(f) for f in frame_ests])

        return f0_estimations, notes
Beispiel #24
0
 def open_wav_audiolab(self, filename):
     #http://scikits.appspot.com/audiolab
     from scikits.audiolab import wavread
     results, sample_frequency,encoding = wavread(filename)
     self.sample_rate = sample_frequency
     print 'Sample Rate is ', sample_frequency
     return results, self.sample_rate
Beispiel #25
0
def raw_specs(filestring):
   from scikits.audiolab import wavread
   import pylab
   import matplotlib.pyplot as plt
   import sys
   import os

   if (filestring.find('wav') > 0 or filestring.find('WAV') > 0):
      nomewav = os.path.basename(filestring)
      filename = os.path.splitext(nomewav)[0]

      maindir = "temp/"+filename+"/"
      for fnamefiles in os.listdir(maindir):
         if os.path.isdir(maindir + fnamefiles) or os.stat(maindir+fnamefiles).st_size == 0:
               print "not a file."
         else:
            if fnamefiles.find('wav') > 0 or fnamefiles.find('WAV') > 0:
               if not os.path.exists(maindir+"/Spec/"):
                  os.makedirs(maindir+"/Spec/")

               signal, fs, enc = wavread(maindir+fnamefiles);

               NFFT = 256     # the length of the windowing segments
               Fs = int(300)  # the sampling frequency

               pylab.figure(num=None, figsize=(4, 8),frameon=False)
               Pxx, freqs, bins, im = pylab.specgram(signal, NFFT=NFFT, Fs=Fs, noverlap=int(NFFT-1),cmap=pylab.cm.gist_heat)  
               if fnamefiles.find('wav') > 0:
                  figname = maindir+"/Spec/"+os.sep+fnamefiles.replace('wav','png') 
               else:
                  figname = maindir+"/Spec/"+os.sep+fnamefiles.replace('WAV','png')
               pylab.savefig(figname)
               plt.close('all')        

      print "Spectrogramas gerados."   
def convert_wav(File, ofile):
    import scikits.audiolab as audiolab
    from scikits.samplerate import resample
    # lastest scikits.audiolab include sound record lib, based on python-alsaaudio
    # if you want make the down sample rate using scipy.signal
    #import scipy.signal

    #using audiolab to read wav file
    Signal, fs = audiolab.wavread(File)[:2]
    #changing the original sample rate to 16000fs fast mode
    Signal = resample(Signal, fr/float(fs), 'sinc_best')
     
    #changing sample rate from audio file using scipy this is a bit slow
    #Signal=scipy.signal.resample(Signal,int(round(len(Getsignal)*fr)/float(fs)),window=None)
     
    # file Format type
    fmt = audiolab.Format('flac', 'pcm16')
    nchannels   = 1
     
    # convert into the file .flac
    ofile =  audiolab.Sndfile(FileNameTmp, 'w', fmt, nchannels, fr)
     
    #writing in the file
    ofile.write_frames(Signal)
    #
    return ofile
Beispiel #27
0
def process(file):
    # read in the file
    f, sr, enc = wavread(file)
    # compute the fourier transform & compute the window times:
    D = librosa.stft(f)
    times = librosa.frames_to_samples(np.arange(D.shape[1]))
    # compute the onset strength envelope:
    env = librosa.onset.onset_strength(y=f, sr=sr)
    assert (len(times) == len(env))
    # compute the onsets we are actually interested in, convert to samples:
    onsets = librosa.onset.onset_detect(y=f, sr=sr)
    onset_samps = librosa.frames_to_samples(onsets)
    assert (onset_samps[-1] <= len(f))
    # create a lookup table for retrieving onset strenghts:
    lookup = []
    prevval = 0
    for v in onset_samps:
        for i in xrange(prevval, len(times)):
            if times[i] == v:
                lookup.append(i)
                prevval = i + 1
                break
    # create an empty audio buffer (result):
    result = np.zeros(len(f))
    # write envelope onset strength values at every onset point
    # computed by the envelope:
    for i in xrange(len(lookup)):
        result[onset_samps[i]] = env[lookup[i]]
    # write the result:
    wavwrite(result, file[:-4] + '_proc.wav', sr, enc)
    return
Beispiel #28
0
def gather_training_data(path=SAMPLE_PATH):
    instr_names = os.walk(path).next()[1]
    samples = dict()

    pitch_pattern = re.compile("([A-G][sb]?)(\d+)")

    # NOTE: Could potentially make subdirs for different qualities

    for instr in instr_names:
        #if instr not in ('guitar', 'trumpet'): continue
        instr_samples = []
        instr_sample_dir = "%s\%s" % (SAMPLE_PATH, instr)
        for samp in [f for f in os.listdir(instr_sample_dir) \
                if os.path.isfile(os.path.join(instr_sample_dir, f)) \
                and os.path.splitext(f)[1].lower() == ".wav"]:
            data, fs, enc = skal.wavread("%s\%s" % (instr_sample_dir, samp))

            matches = pitch_pattern.search(samp)
            assert matches is not None

            chroma, octave = matches.groups()
            chroma = canonical_chroma[chroma]

            # NOTE: It's quite possible that using a dictionary
            #       instead of a list will be helpful, but we'll
            #       cross that bridge when we get to it
            instr_samples.append( (data, chroma, octave) )

        samples[instr] = instr_samples

    return samples
def normalize_target_audio(input_file='moviehires_endpos_beta02.imatsh.wav', 
                           sources_expr='/home/mkc/Music/GoldbergVariations/*48_1.wav', write_me=False, amp_factor=0.5, proc_audio=True):
    """
    Per-variation normalization of concatenated imatsh file using individual sources as locators
    Assumes that the input_file and the source_dir have the same sample rate
    inputs:
        input_file  - the file to be processed (locally normalized)
        sources_expr- regular expression for input files
        write_me    - write output files when true [False]
        amp_factor  - amplitude change factor (proportion of full scale normalization) [0.5]
        proc_audio  - whether to process target audio using source audio info [1]
    outputs:
        sample_locators - sample locators for each variation
        audio_summaries - min, max, rms values for each variation        
    output files:
        output_file = {input_file_stem}+'norm.'+{input_ext}
    """
    # Compute min, max, rms per source file
    flist = glob.glob(sources_expr)
    flist.sort()
    sample_locators = [0]
    audio_summaries = []
    ext_pos = input_file.rindex('.')
    outfile_stem, ext = input_file[:ext_pos], input_file[ext_pos+1:]
    for i,f in enumerate(flist):
        x,sr,fmt = skaud.wavread(f)
        print f, sr, fmt
        if(len(x.shape)>1):
            x = x[:,0] # Take left-channel only
        sample_locators.extend([len(x)])
        audio_summaries.append([max(abs(x)), np.sqrt(np.mean(x**2))])
        if proc_audio:
            y,sr_y,fmt_y = skaud.wavread(input_file, first=np.cumsum(sample_locators)[-2], last=sample_locators[-1])
            if sr != sr_y:
                raise ValueError("input and source sample rates don't match: %d,%d"%(sr,sr_y))
            audio_summaries.append([max(abs(y[:,0])), np.sqrt(np.mean(y[:,0]**2))])
            max_val = audio_summaries[-1][0]
            rms_val = audio_summaries[-1][1]
            norm_cf = amp_factor / max_val + (1 - amp_factor)
            outfile = outfile_stem+'_%02d.%s'%(i+1,ext)
            max_amp_val = norm_cf * max_val
            rms_amp_val = norm_cf * rms_val
            print '%s: nrm=%05.2fdB, peak=%05.2fdB, *peak=%05.2fdB, rms=%05.2fdB, *rms=%05.2fdB'%(
                outfile, dB(norm_cf), dB(max_val), dB(max_amp_val), dB(rms_val), dB(rms_amp_val))
            if(write_me):
                skaud.wavwrite(norm_cf*y, outfile, sr, fmt)
    return np.cumsum(sample_locators), np.array(audio_summaries)
Beispiel #30
0
def train_codebook(basedirectory,
                   spectral,
                   desired_fs,
                   clfs,
                   n_samples):
    """Train the codebooks.

    Arguments:
    :param basedirectory: root directory of the audio corpus
    :param spectral:
      Spectral feature extraction.
      Object should be picklable and implement the
      \c Spectral abc; i.e. provide a \c transform method.
    :param clfs:
      list of clusterers. valid clusterers have a \c fit method
      and a \c predict method. optionally, for soft vq, also implement
      a \c predict_proba method.
    :param n_samples:
      number of spectral frames to sample from the audio corpus.
    :returns:
      a list of Codebook objects, of same length as the output of spectral_func
    """
    wavs = list(rglob(basedirectory, '*.wav'))
    np.random.shuffle(wavs)

    inds = None
    idx = 0
    X = None
    for i, wav in enumerate(wavs):
        if i % 10 == 0 and i > 0:
            print 'samples: {3}/{4}; loading file: {0} ({1}/{2})'.format(
                wavs[i],
                i+1,
                len(wavs),
                X.shape[0],
                n_samples
            )
        sig, fs, _ = audiolab.wavread(wav)
        start, stop = trim_silence(sig, fs)
        specs = spectral.transform(samplerate.resample(sig[start:stop],
                                                       desired_fs/fs,
                                                       'sinc_best'))
        if inds is None:
            inds = [0] + list(np.cumsum([spec.shape[1] for spec in specs]))
        spec = np.hstack(specs)
        if idx + spec.shape[0] >= n_samples:
            spec = spec[:n_samples - idx, :]
        if X is None:
            X = spec
        else:
            X = np.vstack((X, spec))
        idx += spec.shape[0]
        if idx >= n_samples:
            break

    cdbs = [Codebook(clf) for clf in clfs]
    for i, cdb in enumerate(cdbs):
        cdb.train(X[:, inds[i]:inds[i+1]])
    return cdbs
Beispiel #31
0
def wav_to_aif(source, target):
    """
	source : fsource audio file
	target : starget audio file
	"""
    x, fs, enc = wavread(str(file))
    AifFileName = target
    wavwrite(x, AifFileName, fs, enc='pcm24')
Beispiel #32
0
def get_sound_duration(file):
	"""
	returns sound duration in seconds
	"""
	from scikits.audiolab import wavread
	sound_in, sr, pcm = wavread(file)

	return len(sound_in)/float(sr)
Beispiel #33
0
 def set_filepath(self, path):
     """
         When passed a valid wav file into ``path``, this file
         is read and the current data is replaced by this new data.
     """
     self.filepath = path
     (self.data, self.sample_rate, self.format) = wavread(path)
     self.recalculate()
Beispiel #34
0
def analyzeWAV(inputFile):
    """
    inputFile = .wav audiofile
    returns array of audiodata and the sampling rate
    """
    data, fs, nbits = audiolab.wavread(inputFile)
    samplingRate = fs
    return [data, samplingRate]
def loadSignal(fileName):
	try:
		x, Fs, encFmt = al.wavread(fileName)
	except IOError:
		print('Could not import file "%s"' % sigPath)
		return None

	return (x, Fs)
Beispiel #36
0
def generateMfcc(wavFile):
    filteredFile = filtering(wavFile, 2800, 3400)
    audio, fs, enc = wavread(filteredFile)
    size = getFrameSize(filteredFile)

    ceps, mspec, spec = mfcc(audio, nwin=size, nfft=size, fs=fs, nceps=13)

    return ceps
Beispiel #37
0
def wavread(filename):
    """
    wav, fs, nbits = wavread(filename)

    Read file FILENAME. WAV is a numpy array, FS is the sampling rate,
    and NBITS is the bit depth.
    """
    return audiolab.wavread(filename)
Beispiel #38
0
def envelope(file, attack=1, release=10):
    # read in the file:
    f, sr, enc = wavread(file)
    env = Envelope()
    env.configure(attackTime=attack, releaseTime=release)
    result = env(essentia.array(f))
    # wavwrite(result, file[:-4] + '_env.wav', sr, enc)
    return result.reshape(-1)
Beispiel #39
0
def get_rms_from_wav(audio_file):
    """
    Returns the root-mean-square (power) of the audio buffer
    """
    from scikits.audiolab import wavread

    data, fs, enc = wavread(audio_file)
    return get_rms_from_data(data)
def main(args):

    # Load up the song (must be .wav) into memory
    amp_data, fs, enc = wavread(args[0])

    AMP_MAX = 10000
    # list of BPMs to match
    BPMs = range(30, 180, 5)
    # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat


    # analyze 5 seconds from the middle of the song
    middle = len(amp_data)/2
    five_seconds = [item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5]]

    # compute the FFT of the 5 second portion
    N = len(five_seconds)
    song_fft = np.fft.fft(five_seconds)[0:N / 2] # cut off repeated FFT data

    numb_bands = 6
    bands = [ [] for i in range(0, numb_bands)]

    # for i in range(len(song_fft)):
    #     bands[get_freq_band_index(i, 8, len(song_fft))].append(song_fft[i])
    hz_0 = 0
    hz_200 = indexOfFFTFreq(200, len(song_fft)) #200 HZ
    hz_400 = indexOfFFTFreq(400, len(song_fft)) #400 HZ
    hz_800 = indexOfFFTFreq(800, len(song_fft)) #800 HZ
    hz_1600 = indexOfFFTFreq(1600, len(song_fft)) #1600 HZ
    hz_3200 = indexOfFFTFreq(3200, len(song_fft)) #3200 HZ
    hz_6400 = indexOfFFTFreq(6400, len(song_fft)) #6400 HZ

    bands[0] = song_fft[hz_0:hz_200]
    bands[1] = song_fft[hz_200:hz_400]
    bands[2] = song_fft[hz_400:hz_800]
    bands[3] = song_fft[hz_800:hz_1600]
    bands[4] = song_fft[hz_1600:hz_3200]
    bands[5] = song_fft[hz_3200:-1]

    bands[0] = bands[0] + list(reversed(bands[0]))
    bands[1] = bands[1] + list(reversed(bands[1]))
    bands[2] = bands[2] + list(reversed(bands[2]))
    bands[3] = bands[3] + list(reversed(bands[3]))
    bands[4] = bands[4] + list(reversed(bands[4]))
    bands[5] = bands[5] + list(reversed(bands[5]))


    fft_bands = [np.fft.ifft(band) for band in bands]


    fig, axs = plt.subplots(nrows=2, ncols=1)
    x = range(0, len(bands[0]))
    y = bands[0]
    axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1)
    axs[0].set_ylabel('')
    axs[0].set_xlabel('')

    plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
Beispiel #41
0
def get_sound_without_silence(source, rmsTreshhold=-40, WndSize=128):
    """
	source : source audio file
	This function returns a begining and end time tags for the begining and the end of audio in a file
	"""
    x, fs, enc = wavread(str(source))
    index = 0

    #Remove the silence at the begining
    while index + WndSize < len(x):
        DataArray = x[index:index + WndSize]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            beginning = index
            break

    #Remove the silence at the end
    x, fs, enc = wavread(str(source))
    WndSize = 128
    index = 0
    x = list(reversed(x))

    while index + WndSize < len(x):
        DataArray = x[int(index):int(index + WndSize)]
        rms = np.sqrt(np.mean(np.absolute(DataArray)**2))
        rms = lin2db(rms)
        index = 0.5 * WndSize + index

        if rms > rmsTreshhold:
            end = 0
            final = index
            break

    #write the sound source without silences
    x, fs, enc = wavread(str(source))
    WndSize = 128
    rmsTreshhold = -70
    index = 0

    end = len(x) - final
    return beginning / fs, end / fs
Beispiel #42
0
    def parse_audio_w(self):
        """

        """
        # data: raw audio data
        # fs: sample rate
        sig, fs = wavread(self.filename)[:2]

        return sig
Beispiel #43
0
def fastICA(mix_file, jamming_file):
    sig1, fs1, enc1 = wavread(mix_file)
    sig2, fs2, enc2 = wavread(jamming_file)
    sig1, sig2 = chop_sig(sig1, sig2)
    wavwrite(array([sig1, sig2]).T, "mixed.wav", fs1, enc1)
    # Load in the stereo file
    recording, fs, enc = wavread("mixed.wav")

    # Perform FastICA algorithm on the two channels
    sources = fastica(recording)

    # The output levels of this algorithm are arbitrary, so normalize them to 1.0.

    m = []
    for k in sources:
        m.append(k[0])
    # Write back to a file
    wavwrite(array(m), "sources.wav", fs, enc)
def main(args):

    # Load up the song (must be .wav) into memory
    amp_data, fs, enc = wavread(args[0])

    AMP_MAX = 10000
    # list of BPMs to match
    BPMs = range(30, 180, 5)
    # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat

    # analyze 5 seconds from the middle of the song
    middle = len(amp_data) / 2
    five_seconds = [
        item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5]
    ]

    # compute the FFT of the 5 second portion
    N = len(five_seconds)
    song_fft = np.fft.fft(five_seconds)[0:N / 2]  # cut off repeated FFT data

    numb_bands = 6
    bands = [[] for i in range(0, numb_bands)]

    # for i in range(len(song_fft)):
    #     bands[get_freq_band_index(i, 8, len(song_fft))].append(song_fft[i])
    hz_0 = 0
    hz_200 = indexOfFFTFreq(200, len(song_fft))  #200 HZ
    hz_400 = indexOfFFTFreq(400, len(song_fft))  #400 HZ
    hz_800 = indexOfFFTFreq(800, len(song_fft))  #800 HZ
    hz_1600 = indexOfFFTFreq(1600, len(song_fft))  #1600 HZ
    hz_3200 = indexOfFFTFreq(3200, len(song_fft))  #3200 HZ
    hz_6400 = indexOfFFTFreq(6400, len(song_fft))  #6400 HZ

    bands[0] = song_fft[hz_0:hz_200]
    bands[1] = song_fft[hz_200:hz_400]
    bands[2] = song_fft[hz_400:hz_800]
    bands[3] = song_fft[hz_800:hz_1600]
    bands[4] = song_fft[hz_1600:hz_3200]
    bands[5] = song_fft[hz_3200:-1]

    bands[0] = bands[0] + list(reversed(bands[0]))
    bands[1] = bands[1] + list(reversed(bands[1]))
    bands[2] = bands[2] + list(reversed(bands[2]))
    bands[3] = bands[3] + list(reversed(bands[3]))
    bands[4] = bands[4] + list(reversed(bands[4]))
    bands[5] = bands[5] + list(reversed(bands[5]))

    fft_bands = [np.fft.ifft(band) for band in bands]

    fig, axs = plt.subplots(nrows=2, ncols=1)
    x = range(0, len(bands[0]))
    y = bands[0]
    axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1)
    axs[0].set_ylabel('')
    axs[0].set_xlabel('')

    plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
def samples(directory):
    for filename in map(lambda _: directory + _, os.listdir(directory)):
        signal, sample_frequency, _ = wavread(filename)

        if signal.ndim > 1:
            transposed = signal.transpose()
            signal = (transposed[0] + transposed[1])/2.0

        yield (os.path.basename(filename)[4], signal, sample_frequency)
Beispiel #46
0
def wavopen(file):  #file => character string
    from scikits.audiolab import wavread
    import numpy
    data, fs, encoding = wavread(file)
    temp = data.tolist()
    data = temp
    del temp
    t = int(numpy.ceil(len(data) / fs))
    return data, fs, t, encoding
Beispiel #47
0
def main(args):

    # Load up the song (must be .wav) into memory
    amp_data, fs, enc = wavread(args[0])

    AMP_MAX = 10000
    # list of BPMs to match
    BPMs = range(30, 180, 5)
    # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat

    # analyze 5 seconds from the middle of the song
    middle = len(amp_data) / 2
    five_seconds = [
        item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5]
    ]

    # compute the FFT of the 5 second portion
    N = len(five_seconds)
    song_fft = np.fft.fft(five_seconds)[0:N]  # cut off repeated FFT data

    correlation_energies = []
    # create FFT wave for each impulse wave generated from the corresponding BPM
    for bpm in BPMs:
        period = int((60.0 / bpm) * 44100)

        impulse_train = [0] * N
        # generate the impulse_train
        for k in range(0, N):
            if k % period == 0:
                impulse_train[k] = AMP_MAX

        imptrain_fft = np.fft.fft(impulse_train)[0:N]  #ti[k] & tj[k]

        # calculate the correlation between the two waves imptrain_fft & song_fft
        correlation_energy = 0
        for k in range(0, len(song_fft)):
            correlation_energy += abs(song_fft[k] * imptrain_fft[k])

        correlation_energies.append(correlation_energy)
        #print str(bpm) + ": \t" + str(correlation_energy)

    largest_i = 0
    for i in reversed(range(0, len(BPMs))):
        if correlation_energies[i] > correlation_energies[largest_i]:
            largest_i = i

    print "BPM: " + str(BPMs[largest_i])

    fig, axs = plt.subplots(nrows=2, ncols=1)
    x = BPMs
    y = correlation_energies
    axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1)
    axs[0].set_ylabel('E_BPMs')
    axs[0].set_xlabel('BPM')

    plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
def main(args):

    # Load up the song (must be .wav) into memory
    amp_data, fs, enc = wavread(args[0])

    AMP_MAX = 10000
    # list of BPMs to match
    BPMs = range(30, 180, 5)
    # period = (60 / x seconds / beat) * (44100 samples / second) = (60 / x) (44100) samples / beat


    # analyze 5 seconds from the middle of the song
    middle = len(amp_data)/2
    five_seconds = [item[0] + item[1] for item in amp_data[middle:middle + 44100 * 5]]

    # compute the FFT of the 5 second portion
    N = len(five_seconds)
    song_fft = np.fft.fft(five_seconds)[0:N] # cut off repeated FFT data

    correlation_energies = []
    # create FFT wave for each impulse wave generated from the corresponding BPM
    for bpm in BPMs:
        period = int((60.0 / bpm) * 44100)

        impulse_train = [0] * N
        # generate the impulse_train
        for k in range(0, N):
            if k % period == 0:
                impulse_train[k] = AMP_MAX

        imptrain_fft = np.fft.fft(impulse_train)[0:N] #ti[k] & tj[k]

        # calculate the correlation between the two waves imptrain_fft & song_fft
        correlation_energy = 0
        for k in range(0, len(song_fft)):
            correlation_energy += abs(song_fft[k] * imptrain_fft[k])

        correlation_energies.append(correlation_energy)
        #print str(bpm) + ": \t" + str(correlation_energy)

    largest_i = 0
    for i in reversed(range(0, len(BPMs))):
        if correlation_energies[i] > correlation_energies[largest_i]:
            largest_i = i

    print "BPM: " + str(BPMs[largest_i])


    fig, axs = plt.subplots(nrows=2, ncols=1)
    x = BPMs
    y = correlation_energies
    axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1)
    axs[0].set_ylabel('E_BPMs')
    axs[0].set_xlabel('BPM')

    plt.savefig("graphs/" + args[0].split('/')[-1] + "_graph.png")
Beispiel #49
0
 def _read_wav_file(self):
     """
         Simply read raw audio data into class var.
     """
     fullsndpath = os.path.join(os.path.expanduser(self.rootpath), 'snd', self.filename)
     try:
         self.rawaudio, self.sr, self.fmt = wavread(fullsndpath)
     except IOError:
         return "IOError! WAV read failed!"
     return self.rawaudio
Beispiel #50
0
def file_preprocessing(path):
  data, fs, enc = wavread(path)
  signal = [mean(d) for d in data]

  f = wave.open(path, "r")
  frames = f.getnframes()
  fs = f.getframerate()
  f.close()

  return (signal, fs, frames)
Beispiel #51
0
def wavopen(file):						#file => character string
	from scikits.audiolab import wavread
	import numpy
	data, fs, encoding = wavread(file)
	temp = data.tolist()
	data = temp
	del temp
	del encoding
	t = int(numpy.ceil(len(data) / fs))
	return data, fs, t
Beispiel #52
0
def read_file(filename):
    from os import path
    ext = path.splitext(filename)[1].lower()
    if ext == WAV_EXT:
        amplitudes_array, sample_frequency, fmt = audiolab.wavread(filename)
    else:
        raise NotImplementedError(
            "Format '%s' not supported. Supported formats are: %s" %
            (ext, ', '.join(SUPPORTED_FORMATS)))
    return amplitudes_array, sample_frequency
Beispiel #53
0
def readfiles(files):
    '''
    Given an array of file paths, reads all of the files and
    generates a list of audilfile objects as defined above.
    '''
    allfiles = []
    for f in files:
        data, sr, enc = wavread(f)
        allfiles.append(audiofile(data, sr, enc, f[:-4]))
    return allfiles
Beispiel #54
0
    def retrieve_file_data(self):
        # removing header and second channel data
        wave = wavread(self.__audio)[0]
        wave = [list(pair) for pair in wave]
        audio_data = numpy.array(wave)
        wave = list(audio_data.flatten())
        wave = wave[::2]
        wave = numpy.array(wave).reshape(-1, 1)

        scale = 0.8 / max(wave)
        self.__wave = numpy.multiply(scale, wave)
Beispiel #55
0
def file_misclass_error_printf(dnn_model, aux_model, which_layers, data_dir,
                               file_list, filter_cutoff, dnn_save_file,
                               aux_save_file):

    # closures
    def dnn_classify(X):
        batch = dnn_model.get_input_space().make_theano_batch()
        fprop = theano.function([batch], dnn_model.fprop(batch))
        prediction = np.argmax(np.sum(fprop(X), axis=0))
        return prediction

    def aux_classify(X):
        Xagg = aggregate_features(dnn_model, X, which_layers)
        prediction = np.argmax(
            np.bincount(np.array(aux_model.predict(Xagg), dtype='int')))
        return prediction

    # filter coeffs
    b, a = sp.signal.butter(4, filter_cutoff / (22050. / 2.))

    dnn_file = open(dnn_save_file, 'w')
    aux_file = open(aux_save_file, 'w')
    label_list = {
        'blues': 0,
        'classical': 1,
        'country': 2,
        'disco': 3,
        'hiphop': 4,
        'jazz': 5,
        'metal': 6,
        'pop': 7,
        'reggae': 8,
        'rock': 9
    }

    for i, fname in enumerate(file_list):
        print 'Processing file {} of {}'.format(i + 1, len(file_list))
        true_label = label_list[fname.split('/')[0]]

        x, _, _ = audiolab.wavread(os.path.join(data_dir, fname))
        x = sp.signal.lfilter(b, a, x)
        X, _ = compute_fft(x)
        X = np.array(X[:, :513], dtype=np.float32)

        dnn_pred = dnn_classify(X)
        dnn_file.write('{fname}\t{true_label}\t{pred_label}\n'.format(
            fname=fname, true_label=true_label, pred_label=dnn_pred))

        aux_pred = aux_classify(X)
        aux_file.write('{fname}\t{true_label}\t{pred_label}\n'.format(
            fname=fname, true_label=true_label, pred_label=aux_pred))

    dnn_file.close()
    aux_file.close()
Beispiel #56
0
def createRings(filename, samples):
	data, fs, enc = wavread(filename)
	if data.ndim == 1: # mono
		maxValues = data
	else: # stereo
		maxValues = data.max(axis = 1)
	perRing = len(data)/samples
	steps = int((maxD - minD)/step) + 1
	highest = [maxValues[(perRing*i):(perRing*(i+1))].max() for i in range(samples)]
	biggest = max(highest)
	perStep = biggest/steps
	return [int(math.ceil(x/perStep)) for x in highest]