def write_clip(stimulus, stimtime, output_dir, num_channels, sample_rate, data): # Set format for Audiolab file export AU or WAV 16bit (encoding originally 'float32') format1 = audio.Format(type='au', encoding='pcm16') format2 = audio.Format(type='wav', encoding='pcm16') # Single File output -- Needs tweaking to work right if stimulus == 'sound': nameSingle = output_dir + '/' + str(stimtime.secs) + '.' + str(stimtime.nsecs) + '_' + stimulus + '_All.wav' single_file = audio.Sndfile(nameSingle, 'w', format2, num_channels, sample_rate) file_name = output_dir + '/' + str(stimtime.secs) + '.' + str(stimtime.nsecs) + '_' + stimulus + '_Channel-1.wav' sound_file = audio.Sndfile(file_name, 'w', format2, 1, sample_rate) # Create list of sound file entities with unique channel names # Each channel output separately ## channel_files = [] ## for channel in range(0,num_channels): ## name = output_dir + '/' + str(stimtime.secs) + '.' + str(stimtime.nsecs) + '_' + stimulus + '_Channel-%d.wav' % (channel+1) ## channel_files.append(audio.Sndfile(name, 'w', format2, 1, sample_rate)) # Manipulate the data to each individual sound file for line in data: soundarray = array(line) # shape: (16384,) soundarray = reshape(soundarray, (len(soundarray)/num_channels, num_channels)) # shape: (4096, 4) #single_file.write_frames(soundarray) soundarray = array_split(soundarray, num_channels, axis=1) # splits into list with 4 arrays of shape (4096, 1) sound_file.write_frames(soundarray[0]) ## for channel in range(0,num_channels): ## channel_files[channel].write_frames(soundarray[channel]) # Finish each of the files sound_file.sync()
def write(self, filename, encoding="pcm16", endianness="file", fileformat=None): if not fileformat: fileformat = os.path.basename(filename).split(".")[-1] format = AL.Format(fileformat, encoding, endianness) f = AL.Sndfile(filename, "w", format, self.channels, self.samplerate) f.write_frames(self.samples) f.close()
def to_sound_file(self, c, filepath): output_file = audiolab.Sndfile( filepath, 'w', audiolab.Format(type='aiff', encoding=self.encoding), self.channels, self.samplerate) output_file.write_frames(self.to_numpy(c)) output_file.close()
def write_file(path, frames, rate=SAMPLE_RATE): format = audiolab.Format("wav", "pcm16") sndfile = audiolab.Sndfile(path, "w", format=format, channels=len(frames.shape), samplerate=rate) sndfile.write_frames(frames) sndfile.sync
def write_clip(stimulus, stimtime, dirname, num_channels, sample_rate, data): format = audio.Format(type='au', encoding='float32') name = dirname + '/' + str(stimtime.secs) + '.' + str( stimtime.nsecs) + '_' + stimulus + '.au' print name soundfile = audio.Sndfile(name, 'w', format, num_channels, sample_rate) for d in data: sndarray = array(d) sndarray = reshape(sndarray, (len(sndarray) / num_channels, num_channels)) soundfile.write_frames(sndarray) soundfile.sync()
def gen(chain, filepath): chain_depth = chain["chain_depth"] bin_size = chain["bin_size"] # Reset the bucket index to its starting value bucket_index = 0 num_buckets = 2 / bin_size + 1 # The total number of buckets, including one special bucket for before the start of the audio. for i in range(chain_depth): bucket_index = bucket_index * num_buckets + num_buckets - 1 new_data = np.empty([chain["samplerate"] * 15]) # 15 seconds of sound try: for i in range(new_data.size): if i % (5 * chain["samplerate"] ) == 0: # Print progress every 5 seconds print i / chain["samplerate"] tc = chain[bucket_index] new_datum = 0 #print tc weighted_index = random.randrange(tc["sum"]) for k in tc.keys(): if k not in ["sum", "samplerate", "bin_size", "chain_depth"]: if weighted_index < tc[k]: new_datum = k break else: weighted_index -= tc[k] new_data[i] = new_datum #print new_data[0:i] # Compute the bucket_index for the next iteration val = (new_data[i] - new_data[i] % bin_size) / bin_size + 1 / bin_size print val bucket_index = int((bucket_index * num_buckets + val) % (num_buckets**chain_depth)) finally: format = audiolab.Format('wav') f3 = audiolab.Sndfile(filepath, 'w', format, 1, chain["samplerate"]) f3.write_frames(new_data) f3.close()
model.load_weights("test2.hdf") frames = np.empty(output_n_frames, dtype=int) frames[0:seq_len] = data[0:seq_len] max_i = output_n_frames - seq_len for i in range(max_i): sys.stdout.write("Status %d/%d \r" % (i + 1, max_i)) sys.stdout.flush() seed = np.empty(seq_len, dtype=int) seed[0:data_frames_to_use] = data[i:i + data_frames_to_use] seed[data_frames_to_use:seq_len] = frames[i + data_frames_to_use:i + seq_len] frames[i + seq_len] = model.predict_classes(np.array([seed]), batch_size=1, verbose=0) sys.stdout.write("Status Saving file... \r") frames = frames / (buckets / 2.0) - 1.0 print frames f2 = audiolab.Sndfile(output_filename, 'w', audiolab.Format('wav'), 1, sampling_rate) f2.write_frames(frames) f2.close() sys.stdout.write("Done! \n")
import numpy as np import scikits.audiolab as audiolab import random # data, sampling_rate, encoding = aiffread('Woodstock.aif') f = audiolab.Sndfile('05 Woodstock.aif', 'r') sampling_rate = f.samplerate channels = f.channels encoding = f.encoding #format = f.format format = audiolab.Format('wav') data = f.read_frames(f.nframes) data = data[30 * f.samplerate:45 * f.samplerate] f2 = audiolab.Sndfile('copy.wav', 'w', format, channels, f.samplerate) f2.write_frames(data) f2.close() print data #data = [[1, 0], [2, 0], [3, 0], [2, 0], [1, 0]] chain_depth = 2 bin_size = 0.00001
while datas != '': print "." Signal = datas.data[:] rawfromC = AutoTune.Tuner(Signal, FS, CHUNK, SCALE_ROTATE, SCALE_ROTATE, LFO_QUANT, CONCERT_A, FIXED_PITCH, FIXED_PULL, CORR_STR, CORR_SMOOTH, PITCH_SHIFT, LFO_DEPTH, LFO_RATE, LFO_SHAPE, LFO_SYMM, FORM_WARP, MIX, KEY) for s in rawfromC: NewSignal.append(s) try: datas = f.read_frames(CHUNK, dtype=numpy.float32) except: break array = numpy.array(NewSignal) fmt = audiolab.Format('wav', 'pcm32') # making the file .wav afile = audiolab.Sndfile(OUT, 'w', fmt, nchannels, FS) #writing in the file afile.write_frames(array) print "Done!"
import scikits.audiolab as audio import matplotlib.pyplot as plt import random def frequencyFilter(signal): print "Len signal:", len(signal) starting_freq = 0 ending_freq = 190000 #len(signal) for i in range(0, len(signal)): signal[i] *= 2 if starting_freq < i < ending_freq: signal[i] = 0 def processWithNumpy(signal): transformedSignal = numpy.fft.fft(signal) frequencyFilter(transformedSignal) cleanedSignal = numpy.fft.ifft(transformedSignal) return numpy.array(cleanedSignal, dtype=numpy.float64) # Must be wav files. infile = sys.argv[1] outfile = sys.argv[2] (inputSignal, samplingRate, bits) = audio.wavread(infile) outputSignal = processWithNumpy(inputSignal) outputFile = audio.Sndfile(outfile, 'w', audio.Format('wav'), 1, samplingRate) outputFile.write_frames(outputSignal) outputFile.close()
hprev = np.zeros((hidden_size, 1)) # reset RNN memory p = 0 # go from start of data inputs = [char_to_ix[ch] for ch in data[p:p + seq_length]] targets = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]] # sample from the model now and then if n % 5000 == 0: print 'sampling model' sample_ix = sample(hprev, inputs[0], 200) # txt = ''.join(str(ix_to_char[ix]) for ix in sample_ix) # print '----\n %s \n----' % (txt, ) frames = [] for ix in sample_ix: frames.append(ix_to_char[ix]) output_file = audiolab.Sndfile('output-' + str(n) + '.wav', 'w', audiolab.Format('wav'), 1, sampling_rate) frames = np.array(frames) output_file.write_frames(frames) output_file.close() # forward seq_length characters through the net and fetch gradient loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev) smooth_loss = smooth_loss * 0.999 + loss * 0.001 if n % 100 == 0: print 'iter %d, loss: %f' % (n, smooth_loss) # print progress # perform parameter update with Adagrad for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]):
model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="rmsprop") model.load_weights("test3.hdf") frames = np.empty(output_n_frames, dtype=int) frames[0:seq_len] = data[0:seq_len] max_i = output_n_frames - seq_len for i in range(max_i): sys.stdout.write("Status %d/%d \r" % (i + 1, max_i)) sys.stdout.flush() frames[i + seq_len] = model.predict_classes(np.array([data[i:i + seq_len] ]), batch_size=1, verbose=0) sys.stdout.write("Status Saving file... \r") frames = frames / (buckets / 2.0) - 1.0 print frames f2 = audiolab.Sndfile('test4b.wav', 'w', audiolab.Format('wav'), 1, sampling_rate) f2.write_frames(frames) f2.close() sys.stdout.write("Done! \n")
mn = this mnpos = x[i] lookformax = False else: if this > mn + delta: mintab.append((mnpos, mn)) mx = this mxpos = x[i] lookformax = True return array(maxtab), array(mintab) d = n * 10 / 10000 * np.mean(abs(F[:, 0])) #El parametro 'd' sirve para elegir los maximos que tengan una diferencia de almenos 'd' unidades respecto # a los vecinos mas cercanos. # Elijo d = n*15./10e4 esperando obtener del orden de 15 maximos por cada 10000 datos en la señal maxtab, mintab = peakdet(abs(F[:, 0]), d) for i in maxtab[:, 0]: F[i, :] = F[i, :] * 2 #Aumentar los picos seleccionados F = F / 2.0 #Disminuir todos los picos # Volver al dominio del tiempo n_audio = ifft(F) output_file = audio.Sndfile("nuevo.wav", 'w', audio.Format('wav'), 2, sampling_rate) output_file.write_frames(n_audio.real) output_file.close() print('\n\nPrograma Finalizado\n')
def stretch_audio(r, fname=fname): curr_sample = 0 mixer.init(f * multiplier) (a_full) = pickle.load(open('%s/%s.pkl' % (ddir, fname), 'r')) #f = mixer.get_init()[0] #mixer.quit() #mixer.init(f) c = mixer.Channel(0) c.set_volume(0.8) base_bpm = get_base_bpm(fname) if base_bpm > 80: base_bpm = base_bpm / 2 print 'base_bpm: %f' % base_bpm # plays first few seconds of audio clip starttime = time.time() secondsin = 0 cmd = '%s temp.wav output.wav -tempo=%d' #cmd = 'soundstretch temp.wav output.wav -bpm=%d' #for i in range(30): bpm = 0 while curr_sample < a_full.shape[0] - f * multiplier: # get next chunk of wav time_btwn_beats = int(r.value) bpm = bpm / 3 + 60000 / time_btwn_beats * 2 / 3 bpm = max(base_bpm * 3 / 4, bpm) rate = bpm / base_bpm (a, curr_sample) = getSamples(a_full, curr_sample, dur=rate) # write chunk to temp.wav out = al.Sndfile('temp.wav', 'w', al.Format(), 2, f) out.write_frames(a) out.close() # fork a process to change tempo of chunk pargs = shlex.split(cmd % (soundstretch, 100 * (rate - 1))) #pargs = shlex.split(cmd % (60000/rate)) p = sp.Popen(pargs, stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w')) p.wait() # wait to queue it #print "Sec: %d, Rate: %f" % (secondsin, rate)#, n.shape print "Sec: %d, TBB: %d, Rate: %f, Tempo: %d" % ( secondsin, time_btwn_beats, rate, (100 * (rate - 1))) #, n.shape while time.time() - starttime < secondsin - 0.2: #print 'z' time.sleep(0.05) #enqueueSamples(c, a[n,:]) # queue it # TODO: this is super choppy :( c.queue(mixer.Sound('output.wav')) secondsin += 1
from scikits import audiolab import numpy sin_a3 = audiolab.Sndfile('saw_a1.aiff') print sin_a3.nframes window_size = 4096 #256 num_windows = sin_a3.nframes / window_size # Truncated int #shuffled_indices = numpy.random.permutation(num_windows) window = numpy.hamming(window_size) file_data = [ window * sin_a3.read_frames(window_size) for _ in range(num_windows) ] output_data = numpy.random.permutation(file_data) output_file = audiolab.Sndfile( 'shuffled_windowed_shifted_saw_a1_' + str(window_size) + '.aiff', 'w', audiolab.Format(type='aiff', encoding=sin_a3.encoding), sin_a3.channels, sin_a3.samplerate) #for i in range(num_windows): # output_file.write_frames(output_data[i]) for i in range(num_windows - 1): output_file.write_frames(output_data[i][window_size / 2:] + output_data[i + 1][:window_size / 2]) output_file.sync()
cd, FileNameTmp = mkstemp('TmpSpeechFile.flac') #Frame Rate used by api speech from google fr=16000. #using audiolab to read wav file Signal, fs = audiolab.wavread(File)[:2] #changing the original sample rate to 16000fs fast mode Signal = resample(Signal, fr/float(fs), 'sinc_best') #changing sample rate from audio file using scipy this is a bit slow #Signal=scipy.signal.resample(Signal,int(round(len(Getsignal)*fr)/float(fs)),window=None) # file Format type fmt = audiolab.Format('flac', 'pcm16') nchannels = 1 # making the file .flac afile = audiolab.Sndfile(FileNameTmp, 'w', fmt, nchannels, fr) #writing in the file afile.write_frames(Signal) #Sending to google the file .flac url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=pt-BR" flac=open(FileNameTmp,"rb").read() header = {'Content-Type' : 'audio/x-flac; rate=16000'} req = urllib2.Request(url, flac, header) data = urllib2.urlopen(req) print data.read()
def write_audio_file(filename, filedata, sample_rate): output_file = audio.Sndfile(filename, 'w', audio.Format('wav'), 1, sample_rate) output_file.write_frames(filedata) output_file.close()
frames = np.empty(output_n_frames, dtype=int) start_frame = 30 * sampling_rate while data[start_frame] < 1500: start_frame += 1 if start_frame % sampling_rate == 0: print start_frame print "found start frame " + str(start_frame) frames[0:seq_len] = data[start_frame:start_frame + seq_len] print frames[0:seq_len] max_i = output_n_frames-seq_len for i in range(max_i): sys.stdout.write("Status %d/%d \r" % (i+1,max_i)) sys.stdout.flush() frames[i+seq_len] = model.predict_classes(np.array([frames[i:i+seq_len]]), batch_size=1, verbose=0) sys.stdout.write("Status Saving file... \r") frames = frames/(buckets/2.0) - 1.0 print frames[0:seq_len * 2] f2 = audiolab.Sndfile('test.wav', 'w', audiolab.Format('wav'), 1, sampling_rate) f2.write_frames(frames) f2.close() sys.stdout.write("Done! \n")
def processWithOurFFT(signal): # this one is significantly slower, but not unreasonable transformedSignal = numpy.array(fft(pad(signal))) frequencyFilter(transformedSignal) cleanedSignal = ifft(transformedSignal) return numpy.array(cleanedSignal, dtype=numpy.float64) # put this code in one of the two functions above to graph the transformed signal # plt.plot([norm(x) for x in transformedSignal]) # plt.show() # blocks the program execution until the window is closed (inputSignal, samplingRate, bits) = audio.wavread('no_tree_ent.wav') inputSignal = numpy.array( [x / 2.0 + random.random() * 0.1 for x in inputSignal]) noisyFile = audio.Sndfile('no_tree_noisy.wav', 'w', audio.Format('wav'), 1, samplingRate) noisyFile.write_frames(inputSignal) noisyFile.close() outputSignal = processWithNumpy(inputSignal) outputFile = audio.Sndfile('no_tree_transformed.wav', 'w', audio.Format('wav'), 1, samplingRate) outputFile.write_frames(outputSignal) outputFile.close()
def run(): # NOTE: Replace this with the name of the test example to perform SS on test_name = "grace_short" wf = skal.Sndfile("%s\\..\\..\\test\\%s.wav" % (SCRIPT_PATH, test_name), "r") p = pa.PyAudio() width = int(wf.encoding[-2:])/8 fs = wf.samplerate # NOTE: Replace this with the names of the instruments that are in the audio user_instr_names = ['piano', 'trumpet'] stream = p.open(format=p.get_format_from_width(4), channels=1, rate=wf.samplerate, output=True) with open(MODEL_PATH, 'rb') as template_file: templates = cPickle.load(template_file) instr_to_cols = dict() cols = [] idx = 0 for instr in user_instr_names: instr_templates = templates[instr] L = 0 # For now, we're just discarding the pitch info for feature, chroma, octave in instr_templates: cols.append(feature) L += feature.shape[1] instr_to_cols[instr] = (idx, idx + L) idx += L W = np.concatenate(cols, axis=1) #encoded_frames = dict(zip(user_instr_names, [[] for _ in range(len(user_instr_names))])) raw_frames = dict(zip(user_instr_names, [[] for _ in range(len(user_instr_names))])) divergence = [] # NOTE: Change this to use the adaptive algorithm or not adaptive = False threshold = 3 src_sep = plca_learn.AdaptiveSourceSeparator(W, user_instr_names, instr_to_cols, threshold, fs, CHUNK_SIZE, adaptive) i = 0 frames_left = wf.nframes while frames_left > 0: frames_requested = min(CHUNK_SIZE, frames_left) data = wf.read_frames(frames_requested, dtype=np.float32) processed_frames, div = src_sep.process_segment(data) for instr in processed_frames: raw_source = processed_frames[instr] #encoded_frames[instr].append(encoded_source) raw_frames[instr].append(raw_source) divergence.append(div) i += 1 frames_left -= frames_requested print "========= Frames Completed: %d/%d ==========" % (wf.nframes - frames_left, wf.nframes) print "Done processing frames." stream.stop_stream() stream.close() p.terminate() format = skal.Format('wav') mode_desc = "-adapt" if adaptive else "-noadapt" for instr, audio_data in raw_frames.items(): output = skal.Sndfile("%s\\..\\..\\%s-separated-%s%s.wav" % (SCRIPT_PATH, test_name, instr, mode_desc),\ 'w', format, 1, fs) for frame in audio_data: output.write_frames(frame) output.close() wf.close() with open("%s\\..\\..\\divergence_results-%s%s.txt" % (SCRIPT_PATH, test_name, mode_desc), 'w+') as f: f.write("Divergence Results\n") f.write("====================\n\n") for i, div in enumerate(divergence): f.write(" -- Segment %d: %f\n" % (i, div))
(epoch + 1, np_epoch, h + 1, max_h, loss[0])) sys.stdout.flush() train_x = np.empty([batch_size, seq_len], dtype=int) train_y = np.zeros([batch_size, buckets], dtype=int) for i in range(batch_size): train_x[i] = data[batch_size * h + i:batch_size * h + i + seq_len] train_y[i][data[batch_size * h + i + seq_len]] = 1 loss = model.train_on_batch(train_x, train_y) sys.stdout.write("Epoch %d/%d saving weights! \r" % (epoch + 1, np_epoch)) model.save_weights("test_sl%d_ep%d.hdf" % (seq_len, epoch), overwrite=True) sys.stdout.write("Epoch %d/%d done! \r" % (epoch + 1, np_epoch)) #model.load_weights("test.hdf") classes = model.predict_classes(train_x) print classes classes = classes / (buckets / 2.0) - 1.0 print classes f2 = audiolab.Sndfile('output.wav', 'w', audiolab.Format('wav'), 1, len(classes)) f2.write_frames(classes) f2.close()