def separate(track,model='tempmodel.h5'): testcaseL,angleL=make_stft( [track.audio[:,0]]) testcaseR,angleR=make_stft( [track.audio[:,1]]) # print(track.name) # print(testcaseR.shape) stft_estimates={ 'musicL':[], 'vocalL':[], 'musicR':[], 'vocalR':[] } testmodel=load_model(model,custom_objects={'TimeFreqMasking':TimeFreqMasking}) stft_estimates['musicL'],stft_estimates['vocalL']=testmodel.predict(testcaseL.reshape(-1,seq_len,n_bins)) stft_estimates['musicR'],stft_estimates['vocalR']=testmodel.predict(testcaseR.reshape(-1,seq_len,n_bins)) # for testcase in testcaseL.reshape(-1,1,seq_len,n_bins): # # print(testcase.shape) # stft_estimates['musicL'].append(music_predL) # stft_estimates['vocalL'].append(vocal_predL) # for testcase in testcaseR.reshape(-1,1,seq_len,n_bins): # stft_estimates['musicR'].append(music_predR) # stft_estimates['vocalR'].append(vocal_predR) for key in stft_estimates.keys(): stft_estimates[key]=np.array(stft_estimates[key]).reshape(-1,88,n_bins) vocals=np.stack([make_wav(stft_estimates['vocalL'],angleL),make_wav(stft_estimates ['vocalR'],angleR)],axis=1) music=np.stack([make_wav(stft_estimates['musicL'],angleL),make_wav(stft_estimates ['musicR'],angleR)],axis=1) # print(track.audio.shape) # print(np.pad(vocals,((0,(track.audio.shape[0]-vocals.shape[0])),(0,0)),'constant',constant_values=((0,0),(0, 0))).shape) # print(music.shape) # palceholder=np.ones((10,1)) return { 'vocals': np.pad(vocals,((0,np.abs(track.audio.shape[0]-vocals.shape[0])),(0,0)),'constant',constant_values=((0,0),(0, 0))), 'accompaniment': np.pad(music,((0,np.abs(track.audio.shape[0]-music.shape[0])),(0,0)),'constant',constant_values=((0,0),(0, 0))) # 'bass': track.targets['bass'].audio, # 'drums': track.targets['drums'].audio, # 'other': track.targets['other'].audio, }
locals().update(config) # Set up model and prediction function x = tensor.tensor3('inputs', dtype='float64') y = tensor.tensor3('targets', dtype='float64') model = 'bs' with open ('gru_best.pkl', 'r') as picklefile: model = load(picklefile) y_hat, cost, cells = nn_fprop(x, y, frame_length, hidden_size, num_layers, model) predict_fn = theano.function([x], y_hat) # Generate print "generating audio..." seed = get_seed(hdf5_file, [seed_index]) sec = 16000 samples_to_generate = sec*secs_to_generate num_frames_to_generate = samples_to_generate/frame_length + seq_length #don't include seed predictions = [] prev_input = seed for i in range(num_frames_to_generate): prediction = predict_fn(prev_input) predictions.append(prediction) pred_min = numpy.min(predictions) pred_max = numpy.max(predictions) prev_input = rescale(prediction, pred_max, pred_min) actually_generated = numpy.asarray(predictions)[seq_length:,:,:,:] #cut off seed last_frames = actually_generated[:,:,-1,:] make_wav(output_filename, actually_generated.flatten()) print str(secs_to_generate)+'seconds of audio generated' print output_filename
saveload.Load(load_path), plotter, saveload.Checkpoint(last_path, save_separately=['log']), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run() # Generate print "generating audio..." seed = get_seed(hdf5_file, [400]) seed_influence_length = frame_length * 3 sec = 16000 samples_to_generate = sec*secs_to_generate num_frames_to_generate = samples_to_generate/frame_length + seed_influence_length generated_seq = [] prev_input = seed for x in range(0, num_frames_to_generate): prediction = predict_fn(prev_input) generated_seq.append(prediction) #NEED TO OVERLAP/AVG? prev_input = prediction actually_generated = numpy.asarray(generated_seq).flatten()[seed_influence_length:] filename = str(frame_length)+str(seq_length)+'.wav' make_wav(filename, actually_generated)
#to start fresh instead of starting from checkpoint, do saveload.Load(load_path) #to start from checkpoint, do saveload.Load(last_path, load_log=True) extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True), FinishAfter(after_n_epochs=num_epochs), saveload.Load(load_path), plotter, saveload.Checkpoint(last_path, save_separately=['log']), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run() # Generate print "generating audio..." seed = get_seed(hdf5_file, [400]) generated_seq = [] prev_input = seed for x in range(0, len_to_generate): prediction = predict_fn(prev_input) generated_seq.append(prediction.flatten()) #NEED TO OVERLAP/AVG #generated_seq.append((prev_input[1:]+prediction[:-1])/2) #generated_seq.append(prediction[-1]) prev_input = prediction make_wav(output_filename, numpy.asarray(generated_seq).flatten())