def high_demo(snd, start, stop): freq = linspace(start, stop, len(snd)) normal_freq = 2 * freq / fs highpass_y = aphighpass(snd, normal_freq) wavwrite(highpass_y, "aphigh_demo.wav", fs)
def write_aubio_onsets(self, onset_list, filepath): print ("Onsets are :%s" % onset_list) audio = MonoLoader(filename=filepath)() marker = AudioOnsetsMarker(onsets = onset_list, type = 'beep') marked_audio = marker(audio) wavwrite(marked_audio, switch_ext(os.path.basename(filepath), \ 'AUBIOONSETS.wav'), 44100)
def synth_audio(audiofile, impfile, chns, angle, nsfile=None, snrlevel=None, outname=None, outsplit=False): FreqSamp, audio = wavfile.read(audiofile) audio = audio.astype(np.float32) / np.amax( np.absolute(audio.astype(np.float32))) gen_audio = np.zeros((audio.shape[0], chns), dtype=np.float32) for ch in range(1, chns + 1): impulse = np.fromfile('{}D{:03d}_ch{}.flt'.format(impfile, angle, ch), dtype=np.float32) gen_audio[:, ch - 1] = np.convolve(audio, impulse, mode='same') gen_audio = add_noise(gen_audio, nsfile=nsfile, snrlevel=snrlevel) if outname is None: return FreqSamp, np.transpose(gen_audio) if outsplit: for ch in range(chns): play_data = audiolab.wavwrite(gen_audio[:, ch], '{}_ch{:02d}.wav'.format( outname, ch), fs=FreqSamp, enc='pcm16') return else: play_data = audiolab.wavwrite(gen_audio, '{}.wav'.format(outname), fs=FreqSamp, enc='pcm16') return
def to_audio(wav_file, index_file): audio_snippets = {} for dtype, inverted_vocab in get_inverse_vocabs().iteritems(): if dtype == RHYTHM_GROUP: continue elif dtype == CHROMA_GROUP: inverted_vocab = scale(inverted_vocab) inverted_vocab[inverted_vocab < 0.0] = 0.0 for i, vec in enumerate(inverted_vocab): token = dtype + str(i) if dtype == CHROMA_GROUP: audio_snippets[token] = gen_chroma(vec) elif dtype == TIMBRE_GROUP: audio_snippets[token] = gen_timbre(vec) keys = audio_snippets.keys() index = {} for i, key in enumerate(keys): t = i * audio_time end_t = (i+1) * audio_time index[key] = [t, end_t] with open(index_file, 'wb') as f: json.dump(index, f) wavwrite(np.hstack([audio_snippets[key] for key in keys]), wav_file, fs=44100)
def record(output, cfilename = 'SpikeTrain2Play.wav', fs=44100, enc = 'pcm26'): """ record the 'sound' produced by a neuron. Takes a spike train as the output. >>> record(my_spike_train) """ # from the spike list simtime_seconds = (output.t_stop - output.t_start)/1000. #time = numpy.linspace(0, simtime_seconds , fs*simtime_seconds) (trace,time) = numpy.histogram(output.spike_times*1000., fs*simtime_seconds) # TODO convolve with proper spike... spike = numpy.ones((fs/1000.,)) # one ms trace = numpy.convolve(trace, spike, mode='same')#/2.0 trace /= numpy.abs(trace).max() * 1.1 try: from scikits.audiolab import wavwrite except ImportError: print "You need the scikits.audiolab package to produce sounds !" wavwrite(trace, cfilename, fs = fs, enc = enc)
def main(): """ Main function for processing the specified soundfile through this reverb. """ parser = argparse.ArgumentParser(description='Artificial Reverb') parser.add_argument('soundfile', help='audio file to process', type=validInput) # the soundfile is the first agument, with parameter values to follow parser.add_argument('outfile', help='path to output file', type=validInput) parser.add_argument('-w', '--wetdry', default=0.2, type=float, help='amount of wet signal in the mix') parser.add_argument('-da', '--damping', default=0.25, type=float, help='amount of high frequency damping') parser.add_argument('-de', '--decay', default=0.4, type=float, help='amount of attentuation applied to signal to make it decay') parser.add_argument('-pd', '--predelay', default=30, type=float, help='amount of time before starting reverb') parser.add_argument('-b', '--bandwidth', default=0.6, type=float, help='amount of high frequency attentuation on input') parser.add_argument('-t', '--tankoffset', default=0, type=float, help='amount of time (ms) to increase the last tank delay time') # Parse the commandline arguments args = parser.parse_args() # Get the entire path and assign soundfile soundfilePath = os.path.join(os.getcwd(), args.soundfile) # From here on, x refers to the input signal x, sampleRate, wavType = wavread(soundfilePath) dry = x.copy() y = reverbTest(x, sampleRate, args.damping, args.decay, args.predelay, args.bandwidth, args.tankoffset) # Apply wet/dry mix output = dryWet(dry, y, args.wetdry) # Finally write the output file wavwrite(transpose(output), args.outfile, sampleRate)
def wavwrite(srcfile, fs, training): try: mat = io.loadmat(srcfile) except ValueError: print('Could not load %s' % srcfile) return dat = mat['dataStruct'][0, 0][0] if ds_factor != 1: dat = signal.decimate(dat, ds_factor, axis=0, zero_phase=True) mn = dat.min() mx = dat.max() mx = float(max(abs(mx), abs(mn))) if training and mx == 0: print('skipping %s' % srcfile) return if mx != 0: dat *= 0x7FFF / mx dat = np.int16(dat) winsize = win_dur * 60 * fs stride = 60 * fs for elec in range(16): aud = dat[:, elec] for win in range(nwin): dstfile = srcfile.replace('mat', str(win) + '.' + str(elec) + '.wav') beg = win * stride end = beg + winsize clip = aud[beg:end] audiolab.wavwrite(clip, dstfile, fs=fs, enc='pcm16')
def process(file): # read in the file f, sr, enc = wavread(file) # compute the fourier transform & compute the window times: D = librosa.stft(f) times = librosa.frames_to_samples(np.arange(D.shape[1])) # compute the onset strength envelope: env = librosa.onset.onset_strength(y=f, sr=sr) assert (len(times) == len(env)) # compute the onsets we are actually interested in, convert to samples: onsets = librosa.onset.onset_detect(y=f, sr=sr) onset_samps = librosa.frames_to_samples(onsets) assert (onset_samps[-1] <= len(f)) # create a lookup table for retrieving onset strenghts: lookup = [] prevval = 0 for v in onset_samps: for i in xrange(prevval, len(times)): if times[i] == v: lookup.append(i) prevval = i + 1 break # create an empty audio buffer (result): result = np.zeros(len(f)) # write envelope onset strength values at every onset point # computed by the envelope: for i in xrange(len(lookup)): result[onset_samps[i]] = env[lookup[i]] # write the result: wavwrite(result, file[:-4] + '_proc.wav', sr, enc) return
def new_numeral_captcha_on_words(self, fname): wordstr = wordstrgen.get_random_wordstr(self.wordbank, self.nwords) numstr = wordstrgen.get_random_numstr(self.nnums) ensure_dir('temp') #these are the filenames of the audio files wordaudio = speechsynth.make_audio(wordstr, 'words', './temp/') numaudio = speechsynth.make_audio(numstr, 'nums', './temp/') # read audio data wordaudio_data, fs_word, enc_word = wavread(wordaudio) numaudio_data, fs_num, enc_num = wavread(numaudio) wordaudio_data = ensure_equal_length(wordaudio_data, numaudio_data) # combine audio data modifying volumes captcha_audio = self.noise_vol * wordaudio_data + self.captcha_vol * numaudio_data outputfname = self.outputdir + fname if (os.path.exists(outputfname)): os.remove(outputfname) wavwrite(captcha_audio, outputfname, 22050) # return output filename and the answer return outputfname, prettify(numstr)
def sibilant_detector(filename): """ The aim of this algorithm is to detect where are the parts in filename where the energy is maximal. This algorithm works as follows: 1- First compute the spectrogram 2- Then compute a gaussian curve centered in the frequency researched. Usually for sibilants it's around 6000 Hz 3- Multiply the spectrum and the gaussian in order to weight the spectrum 4- Mean all the resultant signal and normalize 5- The peaks in the resulting signal are the parts in time where the energy in the researched area is the most important. """ sound_data, fs, enc = wavread(filename) #Gaussian coefs sigma = 5 mu = 10000 # mean frequency NFFT = 512 #Spectre Pxx, freqs, bins, im = specgram(sound_data, NFFT=NFFT, noverlap=128, Fs=fs) show() #Siflantes detector nb_of_windows = Pxx.shape[1] nb_of_fft_coefs = Pxx.shape[0] #Compute the gaussian vector and plot weights = weighting_vector(nb_of_fft_coefs, sigma, mu, fs) f_wweights = np.linspace(0, fs / 2, len(weights), endpoint=True) plot(f_wweights, weights) show() fft_coeficients = np.zeros(nb_of_fft_coefs) sibilant_desc = [] weighted_ffts = [] #Multiply the weights and the spectrum and show the multiplication for i in range(nb_of_windows): weighted_fft = Pxx[:, i] * weights if len(weighted_ffts) == 0: weighted_ffts = weighted_fft else: weighted_ffts = np.c_[weighted_ffts, weighted_fft] sibilant_desc.append(sum(weighted_fft)) imshow(weighted_ffts, interpolation='nearest', aspect='auto') show() #Now mean the matrix to have only one descriptor sibilant_desc = [float(i) / max(sibilant_desc) for i in sibilant_desc] plot(sibilant_desc) show() #export audio max_index, max_value = max(enumerate(sibilant_desc), key=operator.itemgetter(1)) wavwrite(sound_data[(max_index - 5) * NFFT:(max_index + 5) * NFFT], 'test.wav', fs=44100)
def low_demo(snd, start, stop): # lowpass at starting at 100 and ending at 1000 freq = linspace(start, stop, len(snd)) normal_freq = 2 * freq / fs lowpass_y = aplowpass(snd, normal_freq) wavwrite(lowpass_y, "aplow_demo.wav", fs)
def envelopefile(file, attack=1, release=10): # read in the file: f, sr, enc = wavread(file) env = Envelope() env.configure(attackTime=attack, releaseTime=release) result = env(essentia.array(f)) wavwrite(result, file[:-4] + '_env.wav', sr, enc) return
def ogg_to_wav(source, target): """ source : source audio file target : target audio file """ x, fs, enc = oggread(source) WavFileName = target wavwrite(x, WavFileName, fs, enc='pcm24')
def wav_to_aif(source, target): """ source : fsource audio file target : starget audio file """ x, fs, enc = wavread(str(file)) AifFileName = target wavwrite(x, AifFileName, fs, enc='pcm24')
def persist(self, filepath=None): """ Saves the mosaic to that location on disk indicated by the `filepath` parameter. """ if filepath: self.filepath = filepath wavwrite(self.data, self.filepath, self.sample_rate)
def add_boundaries(wavfile, boundaries, output='output.wav', boundsound="sounds/bell.wav", start=0, end=None): """Adds a cowbell sound for each boundary and saves it into a new wav file. @param wavfile string: Input wav file (sampled at 11025Hz or 44100Hz). @param boundaries np.array: Set of times representing the boundaries (in seconds). @param output string: Name of the output wav file. @param boundsound string: Sound to add to the original file. @param start float: Start time (in seconds) @param end float: End time (in seconds) """ OFFSET = 0.0 # offset time in seconds x, fs = read_wav(wavfile) xb, fsb = read_wav(boundsound) # Normalize x /= x.max() # Copy the input wav file to the output out = np.zeros(x.size + xb.size + 1000) out[:x.size] = x / 3.0 # Add boundaries for bound in boundaries: start_idx = int((bound + OFFSET) * fs) end_idx = start_idx + xb.size read_frames = out[start_idx:end_idx].size out[start_idx:end_idx] += xb[:read_frames] # Cut track if needed start_time = start * fs if start_time < 0: start_time = 0 if end is None: end_time = len(out) else: end_time = end * fs if end_time > len(out): end_time = len(out) out = out[int(start_time):int(end_time)] # Write output wav audiolab.wavwrite(out, output, fs=fs) # Convert to MP3 and delete wav dest_mp3 = output.replace(".wav", ".mp3") wav2mp3(output, dest_mp3) os.remove(output) print "Wrote %s" % dest_mp3
def mono_to_stereo(source, target): import numpy as np f = Sndfile(source, 'r') if f.channels == 1: #To mono x, fs, enc = wavread(source) print "here" print type(x) print type(np.array([x, x])) wavwrite(np.array([x, x]).transpose(), target, fs, enc='pcm24')
def wav_to_mono(source, target): """ source : source audio file target : target audio file """ f = Sndfile(source, 'r') if f.channels != 1: #To mono x, fs, enc = wavread(source) f.channels wavwrite(x[:, 0], target, fs, enc='pcm24')
def aif_to_wav(source, target): """ source : source audio file target : target audio file """ try: x, fs, enc = aiffread(str(source)) WavFileName = target wavwrite(x, WavFileName, fs, enc='pcm24') except: print "File is not aif" pass
def cut_silence_in_sound(source, target, rmsTreshhold=-40, WndSize=128): """ source : fsource audio file target : output sound This function cuts the silence at the begining and at the end of an audio file in order. It's usefull for normalizing the length of the audio stimuli in an experiment. The default parameters were tested with notmal speech. """ NbofWrittendFiles = 1 x, fs, enc = wavread(str(source)) index = 0 #Remove the silence at the begining while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 beginning = index print beginning / 44100 break #Remove the silence at the end x, fs, enc = wavread(str(source)) WndSize = 128 index = 0 x = list(reversed(x)) while index + WndSize < len(x): DataArray = x[int(index):int(index + WndSize)] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = lin2db(rms) index = 0.5 * WndSize + index if rms > rmsTreshhold: end = 0 final = index print(len(x) - final) / 44100 break #write the sound source without silences x, fs, enc = wavread(str(source)) WndSize = 128 rmsTreshhold = -70 index = 0 name_of_source = str(os.path.basename(source)) name_of_source = os.path.splitext(name_of_source)[0] path, sourcename = os.path.split(source) wavwrite(x[beginning:len(x) - final], target, fs, enc='pcm24')
def generate_silence_sound(duration, fs, name, enc="pcm16"): """ duration : in seconds fs : sampling frequency name : file name to generate enc : pcm16, pcm24 ... """ import numpy as np from scikits.audiolab import wavwrite data = np.zeros(duration * fs) wavwrite(data, name, fs, enc)
def wavwrite(frames, outfile, rate, enc): log.debug('Writing file %s',outfile) temp = None if outfile.endswith('.mp3'): temp = tempfile.mktemp('.wav','b') audiolab.wavwrite(frames.compressed(), temp, rate, enc) args = ['ffmpeg','-y','-i',temp,outfile] log.debug('Calling ffmpeg: %s',args) subprocess.call(args,stderr=subprocess.PIPE) os.unlink(temp) else: audiolab.wavwrite(frames.compressed(), outfile, rate, enc)
def match_dir(props, prop, outdir, ext): ref_prop = min(props.keys(), key=lambda x: props[x][prop]) ref_peak = max(props.keys(), key=lambda x: (props[x]['peak'] * props[ref_prop][prop] / props[x][prop])) for f in props: a = props[f]['sig'] * (props[ref_prop][prop] / (props[f][prop] * props[ref_peak]['peak'])) bname = os.path.basename(f) wavwrite( a, os.path.join(outdir, os.path.splitext(bname)[0] + ext + '.wav'), props[f]['fs'], props[f]['enc'])
def __init__(self, start, end, frames, save_soundfile=False, secs_per_block=2): self.start = start self.end = end self.frames = frames tmpwav = tempfile.mktemp('.wav') self.soundfile = tmpwav audiolab.wavwrite(self.frames, tmpwav, 44100, 'pcm16') log.debug('Calculating butterscotch') self.signature = audioprocessing.butterscotch(tmpwav, secs_per_block=secs_per_block) log.debug('Done') if not save_soundfile: os.unlink(tmpwav)
def execute_flac_convert(): """ Cycles through test_data, converting all flac to wav Script includes a utility remove spaces and problem characters from file name """ files = [f for f in glob('*.flac')] for af in files: x = flacread(af)[0] log.debug("Found a flac file: '%s'" % af) n = switch_ext(strip_all(af), '.wav') print ("Converting '%s' to: '%s'" % (af, n)) wavwrite(x, n, 44100)
def match_dir(props, prop, outdir, ext): ref_prop = min(props.keys(), key=lambda x: props[x][prop]) ref_peak = max(props.keys(), key=lambda x: (props[x]['peak'] * props[ref_prop][prop] / props[x][prop])) for f in props: a = props[f]['sig'] * (props[ref_prop][prop] / (props[f][prop] * props[ref_peak]['peak'])) bname = os.path.basename(f) wavwrite(a, os.path.join(outdir, os.path.splitext(bname)[0] + ext + '.wav'), props[f]['fs'], props[f]['enc'])
def IndexFileInFolder(FolderName): for file in glob.glob(FolderName + "/*.wav"): # Wav Files x, fs, enc = aiffread(str(file)) WndSize = 16384 rmsTreshhold = -50 index = 0 NbofWrittendFiles = 1 while index + WndSize < len(x): DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = Lin2db(rms) index = WndSize + index if rms > -55: end = 0 begining = index index = WndSize + index while rms > -55: if index + WndSize < len(x): index = WndSize + index DataArray = x[index:index + WndSize] rms = np.sqrt(np.mean(np.absolute(DataArray)**2)) rms = Lin2db(rms) end = index else: break #if file is over 500 ms long, write it if (end - begining) > (fs / 2): duree = (end - begining) / float(fs) print "duree : " + str(duree) begining = begining - WndSize if begining < 0: begining = 0 end = end + WndSize if end > len(x): end = len(x) name = os.path.splitext(str(file))[0] name = os.path.basename(name) wavwrite(x[begining:end], "Indexed/" + "/" + FolderName + "/" + name + "_" + str(NbofWrittendFiles) + ".wav", fs, enc='pcm24') NbofWrittendFiles = NbofWrittendFiles + 1
def clicktrack(file): # read in the file: f, sr, enc = wavread(file) env = Envelope() env.configure(attackTime=0, releaseTime=5) curve = env(essentia.array(f)) result = np.zeros(len(curve)) i = 1 while i < len(curve): if curve[i] - curve[i - 1] > 0.05: result[i] = curve[i] # record the click at the onset i += 1100 # advance the playhead by 1100 samples (~22 ms) to avoid closely-spaced clicks continue i += 1 wavwrite(result, file[:-4] + '_clicks.wav', sr, enc) return
def makeTransients(vox, prefix): wavwrite(transients(v, trans, 100), prefix + v[-9:-4] + '_transLong.wav', 44100, 'pcm24') wavwrite(transients(v, trans, 20), prefix + v[-9:-4] + '_transShort.wav', 44100, 'pcm24') wavwrite(transients(v, bulbs, 100), prefix + v[-9:-4] + '_bulbs.wav', 44100, 'pcm24') wavwrite(transients(v, tiny, 100), prefix + v[-9:-4] + '_tiny.wav', 44100, 'pcm24')
def normalize_target_audio(input_file='moviehires_endpos_beta02.imatsh.wav', sources_expr='/home/mkc/Music/GoldbergVariations/*48_1.wav', write_me=False, amp_factor=0.5, proc_audio=True): """ Per-variation normalization of concatenated imatsh file using individual sources as locators Assumes that the input_file and the source_dir have the same sample rate inputs: input_file - the file to be processed (locally normalized) sources_expr- regular expression for input files write_me - write output files when true [False] amp_factor - amplitude change factor (proportion of full scale normalization) [0.5] proc_audio - whether to process target audio using source audio info [1] outputs: sample_locators - sample locators for each variation audio_summaries - min, max, rms values for each variation output files: output_file = {input_file_stem}+'norm.'+{input_ext} """ # Compute min, max, rms per source file flist = glob.glob(sources_expr) flist.sort() sample_locators = [0] audio_summaries = [] ext_pos = input_file.rindex('.') outfile_stem, ext = input_file[:ext_pos], input_file[ext_pos+1:] for i,f in enumerate(flist): x,sr,fmt = skaud.wavread(f) print f, sr, fmt if(len(x.shape)>1): x = x[:,0] # Take left-channel only sample_locators.extend([len(x)]) audio_summaries.append([max(abs(x)), np.sqrt(np.mean(x**2))]) if proc_audio: y,sr_y,fmt_y = skaud.wavread(input_file, first=np.cumsum(sample_locators)[-2], last=sample_locators[-1]) if sr != sr_y: raise ValueError("input and source sample rates don't match: %d,%d"%(sr,sr_y)) audio_summaries.append([max(abs(y[:,0])), np.sqrt(np.mean(y[:,0]**2))]) max_val = audio_summaries[-1][0] rms_val = audio_summaries[-1][1] norm_cf = amp_factor / max_val + (1 - amp_factor) outfile = outfile_stem+'_%02d.%s'%(i+1,ext) max_amp_val = norm_cf * max_val rms_amp_val = norm_cf * rms_val print '%s: nrm=%05.2fdB, peak=%05.2fdB, *peak=%05.2fdB, rms=%05.2fdB, *rms=%05.2fdB'%( outfile, dB(norm_cf), dB(max_val), dB(max_amp_val), dB(rms_val), dB(rms_amp_val)) if(write_me): skaud.wavwrite(norm_cf*y, outfile, sr, fmt) return np.cumsum(sample_locators), np.array(audio_summaries)
def procFile(v, prefix): wavwrite(transients(v, trans, 20), prefix + v[-9:-4] + '_trans00.wav', 44100, 'pcm24') wavwrite(transients(v, trans, 10), prefix + v[-9:-4] + '_trans01.wav', 44100, 'pcm24') wavwrite(resonances(v, reson, 10), prefix + v[-9:-4] + '_reson00.wav', 44100, 'pcm24') wavwrite(resonances(v, reson, 15), prefix + v[-9:-4] + '_reson01.wav', 44100, 'pcm24') return
def TestBPDN2(): # ________________________________________ print 'Test: basis pursuit decomposition' fs = 8000 btmp = audiolab.wavread('glockenspiel.wav')[0] b = samplerate.resample(btmp, fs/44100.,'sinc_best') L = len(b) A = GaborBlock(L,1024) B = GaborBlock(A.M,64) C = DictionaryUnion(A,B) b = np.hstack((b,np.zeros(C.M-L))) e = 1e-2 x = BPDN(C,b,e,100) ye = np.real(C.dot(x)) print 'Error (should be <= %f): %f' % (e,np.sum((b-ye)**2)) print '----------------------------------------' xtone = x[:A.N] xtrans = x[A.N:] ytone = np.real(A.dot(xtone)) ytrans = np.real(B.dot(xtrans)) audiolab.wavwrite(ytone,'ytone.wav',fs) audiolab.wavwrite(ytrans,'ytrans.wav',fs) # tonal decomp m = np.log10(np.abs(A.conj().transpose().dot(ytone))) tfgrid = np.reshape(range(0,A.N),(A.N/A.fftLen,A.fftLen)) tfgrid = tfgrid[:,:A.fftLen/2+1] pyplot.subplot(2,1,1) pyplot.imshow(m[tfgrid].transpose(), aspect='auto', interpolation='bilinear', origin='lower') # transient decomp m = np.log10(np.abs(B.conj().transpose().dot(ytrans))) tfgrid = np.reshape(range(0,B.N),(B.N/B.fftLen,B.fftLen)) tfgrid = tfgrid[:,:B.fftLen/2+1] pyplot.subplot(2,1,2) pyplot.imshow(m[tfgrid].transpose(), aspect='auto', interpolation='bilinear', origin='lower') pyplot.show()
def fastICA(mix_file, jamming_file): sig1, fs1, enc1 = wavread(mix_file) sig2, fs2, enc2 = wavread(jamming_file) sig1, sig2 = chop_sig(sig1, sig2) wavwrite(array([sig1, sig2]).T, "mixed.wav", fs1, enc1) # Load in the stereo file recording, fs, enc = wavread("mixed.wav") # Perform FastICA algorithm on the two channels sources = fastica(recording) # The output levels of this algorithm are arbitrary, so normalize them to 1.0. m = [] for k in sources: m.append(k[0]) # Write back to a file wavwrite(array(m), "sources.wav", fs, enc)
def trackify(path, filename): clips = readfiles(getfiles(path)) indices = range(len(clips)) lengths = [] for c in clips: lengths.append(lenSec(c)) maxFade = min(lengths) minFade = maxFade / 3 shuffle(indices) result = crossfade(clips[indices[0]], clips[indices[1]], random.random() * (maxFade - minFade) + minFade) for i in xrange(2, len(indices)): maxFade = lenSec(clips[indices[i]]) result = crossfade( result, clips[indices[i]], random.random() * (.666 * maxFade) + (.333 * maxFade)) wavwrite(result.data, filename, result.sr, result.enc) return
def write_file(wave, filename): from os import path ext = path.splitext(filename)[1].lower() if ext == WAV_EXT: return audiolab.wavwrite(wave.waveform, filename, fs=wave.samplerate) else: raise NotImplementedError( "Format '%s' not supported. Supported formats are: %s" % (ext, ', '.join(SUPPORTED_FORMATS)))
def wavwrite(srcfile): try: mat = io.loadmat(srcfile) except ValueError: print('Could not load %s' % srcfile) return dat = mat['dataStruct'][0, 0][0] mn = dat.min() mx = dat.max() mx = float(max(abs(mx), abs(mn))) if mx != 0: dat *= 0x7FFF / mx dat = np.int16(dat) for elec in range(16): dstfile = srcfile.replace('mat', str(elec) + '.wav') aud = dat[:, elec] audiolab.wavwrite(aud, dstfile, fs=400, enc='pcm16')
def generateSamples(label, background): print 'Saving ' + label output_folder = OUTPUT_DIRECTORY + '/' + label + '/' input_folder = INPUT_DIRECTORY + '/' + label + '/' if not os.path.exists(output_folder): os.mkdir(output_folder) wavfiles = [input_folder + '/' + wavfile for wavfile in os.listdir(input_folder)] print(wavfiles) nr = 1 for file in wavfiles: data, fs, _ = wavread(file) data_len = len(data) for i in range(0, SAMPLES_PER_ORYGINAL): start = random.randint(0, len(background) - data_len) ratio = 0.25 + random.random() * 0.5 # range 0.25 - 0.75 sample = (1 - ratio) * data + ratio * background[start:start+data_len] print 'Saving ' + label + '/' + str(nr) + '.wav' wavwrite(sample, output_folder + str(nr) + '.wav', fs=fs) nr += 1
def write(self, file_path): """ Write the audio data to file. Return ``True`` on success, or ``False`` otherwise. This function works only for mono wav files! :param file_path: the path of the output file to be written :type file_path: string (path) :rtype: bool .. versionadded:: 1.2.0 """ self._log(["Writing audio file '%s'...", file_path]) try: wavwrite(self.audio_data, file_path, self.audio_sample_rate, self.audio_format) except: self._log("Error writing audio file", severity=Logger.CRITICAL) return False return True
def recordAudio(): CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 RECORD_SECONDS = 1 WAVE_OUTPUT_FILENAME = "audioOriginal.wav" p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("* recording:") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print("* Finished recording.") stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() # Duplicate audio and save as Actual frames, fs, encoder = audiolab.wavread('audioOriginal.wav') audiolab.wavwrite(frames, 'audioActual.wav', fs)
def recordAudio(): CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 RECORD_SECONDS = 1 WAVE_OUTPUT_FILENAME = "audioOriginal.wav" p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("* recording:") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print("* Finished recording.") stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() # Duplicate audio and save as Actual frames, fs, encoder = audiolab.wavread('audioOriginal.wav') audiolab.wavwrite(frames,'audioActual.wav',fs)
def main(): originalArray, noisyArray, fs = fn.noisemixer(-10) print 'snr original: ' + str(fn.snrcalculation(originalArray, noisyArray)) + ' Db' order = 5 butterworthcutoff = 700 # Hz walevetcutoff = 11025 btt_filtered = fn.butter_lowpass_filter(originalArray + noisyArray, butterworthcutoff, fs, order) print 'butterworth filtered data snr: ' + str( fn.snrcalculation(originalArray, originalArray - btt_filtered)) wavwrite(btt_filtered, 'butterworth_lowpass.wav', fs) wv_filtered = fn.wavelet_hard(originalArray + noisyArray, walevetcutoff, fs, order) print 'wavelet_hard filtered data snr: ' + str( fn.snrcalculation(originalArray, originalArray - wv_filtered)) wavwrite(wv_filtered, 'wavelet_hard.wav', fs) '''
def batchChordGenerate(taskList, prefix): ''' inputs: a list of lists each sublist contains the following values @ indices: 0 : vox collection index 1 : vox sample index 2 : a list: [sample #, transposition semitones, transposition cents] 3 : ... and so on ... ''' for i, t in enumerate(taskList): print "begin task for vocal gesture " + p(t[0]) + '_' + p(t[1]) # grab the vocal gesture: gestur = vox[t[0]][t[1]] # compute clicks to be applied in all files: idx, l = clicks(gestur) clk = [idx, l] # compute envelope to be applied to all files: env = envelope(gestur, 10, 30) env *= (1 / env.max()) # normalize envelope # proceed with computing: fp = prefix + '_' + p(i) + '__' + p(t[0]) + '_' + p(t[1]) + '__' sr = None enc = None for j in xrange(2, len(t)): fname = fp + str(j - 2) + '__' + str(t[j][0]) + '.wav' print " computing " + str(j) + ' : ' + fname transpratio = tr(t[j][1], t[j][2]) # print " transposition : " + str(t[j][1]) + ' ' + str(t[j][2]) + ' = ' + str(transpratio) rezs = [] # array of np.arrays containing transposed resonances for f in coll_safe[t[j][0]]: # print " file : " + f x, sr, enc = wavread(f) rezs.append(resample(x, transpratio, 'sinc_best')) wavwrite(resonancesChord(None, rezs, 30, clk, env), fname, 44100, 'pcm24') return
from mdp import fastica from scikits.audiolab import wavread, wavwrite from numpy import abs, max # Load in the stereo file #for i in range(0, 1500): # recording, fs, enc = wavread('piano/wav/' + str(i) + '.wav') # recording /= (5 * max(abs(recording), axis = 0)) # wavwrite(recording, 'piano/wav/' + str(i) + '.wav', fs, enc) #for i in range(0, 1500): # recording, fs, enc = wavread('clarinet/wav/' + str(i) + '.wav') # recording /= (5 * max(abs(recording), axis = 0)) # wavwrite(recording, 'clarinet/wav/' + str(i) + '.wav', fs, enc) for i in range(0, 6000): recording, fs, enc = wavread('wav/' + str(i) + '.wav') recording /= (5 * max(abs(recording), axis = 0)) wavwrite(recording, 'wav/' + str(i) + '.wav', fs, enc)
def synthesize(self, text_file, audio_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) :param quit_after: stop synthesizing as soon as reaching this many seconds :type quit_after: float :param backwards: synthesizing from the end of the text file :type backwards: bool """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) if quit_after is not None: self._log(["Quit after reaching %.3f", quit_after]) if backwards: self._log("Synthesizing backwards") # for each fragment, synthesize it and concatenate it num = 0 num_chars = 0 fragments = text_file.fragments if backwards: fragments = fragments[::-1] for fragment in fragments: # synthesize and get the duration of the output file self._log(["Synthesizing fragment %d", num]) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) duration = espeak.synthesize( text=fragment.text, language=fragment.language, output_file_path=tmp_destination ) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # increase the character counter num_chars += fragment.characters # concatenate to buffer self._log(["Fragment %d starts at: %f", num, current_time]) if duration > 0: self._log(["Fragment %d duration: %f", num, duration]) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # NOTE since append cannot be in place, # it seems that the only alternative is pre-allocating # the destination array, # possibly truncating or extending it as needed # if backwards: waves = numpy.append(data, waves) else: waves = numpy.append(waves, data) else: self._log(["Fragment %d has zero duration", num]) # remove temporary file self._log(["Removing temporary file '%s'", tmp_destination]) os.close(handler) os.remove(tmp_destination) num += 1 if (quit_after is not None) and (current_time > quit_after): self._log(["Quitting after reached duration %.3f", current_time]) break # output WAV file, concatenation of synthesized fragments self._log(["Writing audio file '%s'", audio_file_path]) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors # TODO anchors do not make sense if backwards == True self._log(["Returning %d time anchors", len(anchors)]) self._log(["Current time %.3f", current_time]) self._log(["Synthesized %d characters", num_chars]) return (anchors, current_time, num_chars)
from tempfile import mkstemp from os.path import join, dirname from os import remove from scikits.audiolab import wavread, wavwrite (tmp, fs, enc) = wavread('test.wav') if tmp.ndim < 2: nc = 1 else: nc = tmp.shape[1] print "The file has %d frames, %d channel(s)" % (tmp.shape[0], nc) print "FS is %f, encoding is %s" % (fs, enc) fd, cfilename = mkstemp('pysndfiletest.wav') try: wavwrite(tmp, cfilename, fs = 16000, enc = 'pcm24') finally: remove(cfilename)
x=n.linspace(0,2*n.pi,Dv,endpoint=False) tabv=n.sin(x) # tabela senoidal para o tremolo # Padrao do vibrato ii=n.arange(fa * dur) # amostras em dur segundos gv=n.array(ii*fv*float(D)/fa, n.int) # indices para pegar na tabela ### Som em si tab=n.linspace(-1,1,D) # dente de serra tv=10**(tab[gv%D]*mu/20) # desvio instantaneo de amplitude para cada amostra gi=n.array(ii*f*(Dv/float(fa)), n.int) # a movimentacao na tabela total, jah inteiro ti=tabv[gi%Dv]*tv p.plot(ti,label=r"$T_i^{tr(f'=1,5Hz)}=\{t_i.a_i\}_0^{\Lambda-1}$", linewidth=2) ti=((ti-ti.min())/(ti.max()-ti.min()))*2-1 # normalizando a.wavwrite(ti,"tremolo.wav",fa) gi=n.array( ii * (D/float(fa)) * f , n.int ) % D t=tab[ gi ] a.wavwrite(t,"original.wav",fa) p.ylabel(r"amplitude $\quad \rightarrow $", fontsize=16) p.xlabel(r"$i\quad \rightarrow$",fontsize=26) p.xlim(-2000,ii[-1]+2000) p.ylim(-4.3,6) p.xticks((0,20000,40000,60000,80000,88200),(r"0",20000,40000,60000,80000,88200)) p.plot(tv, label =r"$a_i=10^{t_i'\,\frac{V_dB=12\,dB}{20}}$", linewidth=4 ) p.legend(loc="upper left") ltext = p.gca().get_legend().get_texts()
from mdp import fastica from scikits.audiolab import wavread, wavwrite from numpy import abs, max # Load in the stereo file recording, fs, enc = wavread('test.wav') # Perform FastICA algorithm on the two channels sources = fastica(recording) # The output levels of this algorithm are arbitrary, so normalize them to 1.0. sources /= (5 * max(abs(sources), axis = 0)) # Write back to a file wavwrite(sources, 'testout.wav', fs, enc)
for d in dirs: output_dir = os.path.join(root,d+'_'+mode) if not os.path.exists(output_dir): os.makedirs(output_dir) origFiles = glob.glob(os.path.join(root, 'Original', '*.wav')) for d in dirs: noisyFiles = glob.glob(os.path.join(root, d, '*.wav')) for a,b in zip(origFiles,noisyFiles): f = os.path.split(b)[1] print 'Processing %s' % f print '-----------------------------' y = audiolab.wavread(a)[0] z = audiolab.wavread(b)[0] r = y-z inSNR = 10*np.log10(y.dot(y)/r.dot(r)) nvar = np.sum(np.abs(y)**2)/(10**(inSNR/10)) ye = Process(z, nvar, mode) r = y-ye outSNR = 10*np.log10(y.dot(y)/r.dot(r)) print 'File: %s, Input SNR = %f, output SNR = %f' % (f, inSNR, outSNR) audiolab.wavwrite(ye,os.path.join(root,d+'_'+mode,'t_'+f),44100.)
pred = np.argmax(np.sum(fprop(X_adv), axis=0)) if pred == t: dnn_file.write('{}\t'.format(int(out_snr+.5))) else: dnn_file.write('{}\t'.format('na')) # aux prediction if args.aux_model: X_adv_agg = aggregate_features(dnn_model, X_adv, which_layers) pred = np.argmax(np.bincount(np.array(aux_model.predict(X_adv_agg), dtype='int'))) if pred == t: aux_file.write('{}\t'.format(int(out_snr+.5))) else: aux_file.write('{}\t'.format('na')) # SAVE ADVERSARY FILES out_file = os.path.join(args.out_path, '{fname}.{label}.adversary.{snr}dB.wav'.format( fname=fname, label=label_list[t], snr=int(out_snr+.5))) audiolab.wavwrite(x_adv, out_file, fs) dnn_file.write('\n'.format(fname)) if args.aux_model: aux_file.write('\n'.format(fname)) dnn_file.close() if args.aux_model: aux_file.close()
def synthesize(self, text_file, audio_file_path): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) num = 0 # for each fragment, synthesize it and concatenate it for fragment in text_file.fragments: # synthesize and get the duration of the output file self._log("Synthesizing fragment %d" % num) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) duration = espeak.synthesize( text=fragment.text, language=fragment.language, output_file_path=tmp_destination ) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # concatenate to buffer self._log("Fragment %d starts at: %f" % (num, current_time)) if duration > 0: self._log("Fragment %d duration: %f" % (num, duration)) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # waves = numpy.concatenate((waves, data)) # # append seems faster than concatenate, as it should waves = numpy.append(waves, data) else: self._log("Fragment %d has zero duration" % num) # remove temporary file self._log("Removing temporary file '%s'" % tmp_destination) os.close(handler) os.remove(tmp_destination) num += 1 # output WAV file, concatenation of synthesized fragments self._log("Writing audio file '%s'" % audio_file_path) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors self._log("Returning %d time anchors" % len(anchors)) return anchors
# Ruido Violeta: # a cada oitava, ganhamos 6dB fator=10.**(6/20.) alphai=fator**(n.log2(fi[i0:]/f0)) c=n.copy(coefs) c[i0:]=c[i0:]*alphai # real par, imaginaria impar c[N/2+1:]=n.real(c[1:N/2])[::-1] - 1j*n.imag(c[1:N/2])[::-1] ruido=n.fft.ifft(c) r=n.real(ruido) r=((r-r.min())/(r.max()-r.min()))*2-1 a.wavwrite(r,'violeta.wav',44100) p.subplot(521) p.title(u'ruĂdo violeta') p.ylim(-10,220) p.plot(n.log10(fi[i0:len(fi)/2]),20*n.log2(n.abs(c[i0:len(c)/2]))) p.subplot(522) p.plot(r[ii:ie]) p.plot(r[ii:ie],'ro', markersize=4) ############# # Ruido Azul # para cada oitava, ganhamos 3dB fator=10.**(3/20.)
# aux prediction X_adv_agg_filt = aggregate_features(dnn_model, X_adv_filt, which_layers) pred = np.argmax(np.bincount(np.array(aux_model.predict(X_adv_agg_filt), dtype='int'))) if pred == t: aux_file_filt.write('{}\t'.format('x')) else: aux_file_filt.write('{}\t'.format('o')) # SAVE ADVERSARY FILES out_file = os.path.join(args.out_path, '{fname}.{label}.adversary.{snr}dB.wav'.format( fname=fname, label=label_list[t], snr=int(out_snr+.5))) audiolab.wavwrite(x_adv, out_file, fs, fmt) out_file2 = os.path.join(args.out_path, '{fname}.{label}.adversary.filtered.wav'.format( fname=fname, label=label_list[t])) audiolab.wavwrite(x_filt, out_file2, fs, fmt) dnn_file.write('\n'.format(fname)) dnn_file_filt.write('\n'.format(fname)) aux_file.write('\n'.format(fname)) aux_file_filt.write('\n'.format(fname)) dnn_file.close() dnn_file_filt.close() aux_file.close()
print 'Amount: {amount} speakers; Relative Duration: {duration}; Conversation Style: {style}'.format( amount=amount, duration=duration, style=freq.keys()[0]) # Generating conversation current_size = 0 fs = None current_data = np.array([]) change_points = [] while current_size < duration: for speaker_id, speaker_utt_ids in utterances_ids.iteritems(): chosen_utt = random.choice(speaker_utt_ids) speaker_samples = audio_dataset[speaker_id].get('speaker_dataset') current_data = np.concatenate((current_data, speaker_samples[chosen_utt].get('raw_data')), axis=0) current_size += speaker_samples[chosen_utt].get('length(secs)') change_points.append(len(current_data)) fs = speaker_samples[chosen_utt].get('sample_rate') if len(change_points): change_points.pop() if len(change_points) > 1: conversations_dataset = {'id': conversation_id, 'speakers_utterances': utterances_ids, 'speakers_turns': change_points, 'audio_data': current_data, 'length(secs)': current_size, 'sample_rate': fs, 'amount_speakers': amount, 'conversation_style': freq.keys()[0]} audlib.wavwrite(np.array(current_data), '{0}.wav'.format(conversation_id), fs) conversation_id += 1 with open('ConversationDataSet.pickle', 'ab') as f: pickle.dump(conversations_dataset, f, -1)
def writeaudio(data, filename='test_in.wav'): audiolab.wavwrite(data, filename, 48000, 'float32')
X_adv_agg = aggregate_features(dnn_model, X_adv, which_layers) p3 = np.argmax(np.bincount(np.array(aux_model.predict(X_adv_agg), dtype='int'))) print 'Predicted label on adversarial example (classifier trained on aggregated features from last layer of dnn): ', p3 if args.out_path: out_snr = 20*np.log10(np.linalg.norm(x[nfft:-nfft]) / np.linalg.norm(x[nfft:-nfft]-x_adv[nfft:-nfft])) label_list = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock'] out_label1 = label_list[p2] out_file1 = os.path.join(args.out_path, 'dnn', '{fname}.{label}_adversary.{snr}dB.dnn.wav'.format(fname=os.path.splitext(os.path.split(args.test_file)[-1])[0], label=out_label1, snr=int(out_snr+.5))) audiolab.wavwrite(x_adv, out_file1, fs, 'pcm16') if args.aux_model: out_label2 = label_list[p3] out_file2 = os.path.join(args.out_path, 'rf', '{fname}.{label}_adversary.{snr}dB.rf.wav'.format(fname=os.path.splitext(os.path.split(args.test_file)[-1])[0], label=out_label2, snr=int(out_snr+.5))) audiolab.wavwrite(x_adv, out_file2, fs, 'pcm16') if 0: ## Time-domain waveforms ## ------------------------------------------------------------------------ plt.ion() N = 512 sup = np.arange(N)