Example #1
0
def separate(track,model='tempmodel.h5'):
	
	testcaseL,angleL=make_stft( [track.audio[:,0]])
	testcaseR,angleR=make_stft( [track.audio[:,1]])
	# print(track.name)
	# print(testcaseR.shape)
	stft_estimates={
	'musicL':[],
	'vocalL':[],
	'musicR':[],
	'vocalR':[]
	}
	testmodel=load_model(model,custom_objects={'TimeFreqMasking':TimeFreqMasking})

	stft_estimates['musicL'],stft_estimates['vocalL']=testmodel.predict(testcaseL.reshape(-1,seq_len,n_bins))
	stft_estimates['musicR'],stft_estimates['vocalR']=testmodel.predict(testcaseR.reshape(-1,seq_len,n_bins)) 

	# for testcase in testcaseL.reshape(-1,1,seq_len,n_bins):
	# 	# print(testcase.shape)
		
	# 	stft_estimates['musicL'].append(music_predL)
	# 	stft_estimates['vocalL'].append(vocal_predL)

	# for testcase in testcaseR.reshape(-1,1,seq_len,n_bins):
		
		
	# 	stft_estimates['musicR'].append(music_predR)
	# 	stft_estimates['vocalR'].append(vocal_predR)

	for key in stft_estimates.keys():
		stft_estimates[key]=np.array(stft_estimates[key]).reshape(-1,88,n_bins)

	vocals=np.stack([make_wav(stft_estimates['vocalL'],angleL),make_wav(stft_estimates ['vocalR'],angleR)],axis=1)
	music=np.stack([make_wav(stft_estimates['musicL'],angleL),make_wav(stft_estimates ['musicR'],angleR)],axis=1)


	# print(track.audio.shape)
	# print(np.pad(vocals,((0,(track.audio.shape[0]-vocals.shape[0])),(0,0)),'constant',constant_values=((0,0),(0, 0))).shape)
	# print(music.shape)

	# palceholder=np.ones((10,1))
	return {
		'vocals': np.pad(vocals,((0,np.abs(track.audio.shape[0]-vocals.shape[0])),(0,0)),'constant',constant_values=((0,0),(0, 0))),
		'accompaniment': np.pad(music,((0,np.abs(track.audio.shape[0]-music.shape[0])),(0,0)),'constant',constant_values=((0,0),(0, 0)))
		# 'bass': track.targets['bass'].audio,
		# 'drums': track.targets['drums'].audio,
		# 'other':  track.targets['other'].audio,
	}
locals().update(config)

# Set up model and prediction function
x = tensor.tensor3('inputs', dtype='float64')
y = tensor.tensor3('targets', dtype='float64')

model = 'bs'
with open ('gru_best.pkl', 'r') as picklefile:
    model = load(picklefile)
y_hat, cost, cells = nn_fprop(x, y, frame_length, hidden_size, num_layers, model)
predict_fn = theano.function([x], y_hat)

# Generate
print "generating audio..."
seed = get_seed(hdf5_file, [seed_index])
sec = 16000
samples_to_generate = sec*secs_to_generate
num_frames_to_generate = samples_to_generate/frame_length + seq_length #don't include seed
predictions = []
prev_input = seed
for i in range(num_frames_to_generate):
    prediction = predict_fn(prev_input)
    predictions.append(prediction)
    pred_min = numpy.min(predictions)
    pred_max = numpy.max(predictions)
    prev_input = rescale(prediction, pred_max, pred_min) 
actually_generated = numpy.asarray(predictions)[seq_length:,:,:,:] #cut off seed
last_frames = actually_generated[:,:,-1,:]
make_wav(output_filename, actually_generated.flatten())
print str(secs_to_generate)+'seconds of audio generated'
print output_filename
Example #3
0
              saveload.Load(load_path), plotter,
              saveload.Checkpoint(last_path, save_separately=['log']),
              ] + track_best('dev_cost', save_path)

if learning_rate_decay not in (0, 1):
    extensions.append(SharedVariableModifier(step_rules[0].learning_rate,
                                             lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False))

print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())
# Finally build the main loop and train the model
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                     model=Model(cost), extensions=extensions)
main_loop.run()

# Generate
print "generating audio..."
seed = get_seed(hdf5_file, [400])
seed_influence_length = frame_length * 3
sec = 16000
samples_to_generate = sec*secs_to_generate
num_frames_to_generate = samples_to_generate/frame_length + seed_influence_length
generated_seq = []
prev_input = seed
for x in range(0, num_frames_to_generate):
    prediction = predict_fn(prev_input)
    generated_seq.append(prediction) #NEED TO OVERLAP/AVG?
    prev_input = prediction
actually_generated = numpy.asarray(generated_seq).flatten()[seed_influence_length:]
filename = str(frame_length)+str(seq_length)+'.wav'
make_wav(filename, actually_generated)
Example #4
0
#to start fresh instead of starting from checkpoint, do saveload.Load(load_path)
#to start from checkpoint, do saveload.Load(last_path, load_log=True)
extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True),
              FinishAfter(after_n_epochs=num_epochs),
              saveload.Load(load_path), plotter,
              saveload.Checkpoint(last_path, save_separately=['log']),
              ] + track_best('dev_cost', save_path)

if learning_rate_decay not in (0, 1):
    extensions.append(SharedVariableModifier(step_rules[0].learning_rate,
                                             lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False))

print 'number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())
# Finally build the main loop and train the model
main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
                     model=Model(cost), extensions=extensions)
main_loop.run()

# Generate
print "generating audio..."
seed = get_seed(hdf5_file, [400])
generated_seq = []
prev_input = seed
for x in range(0, len_to_generate):
    prediction = predict_fn(prev_input)
    generated_seq.append(prediction.flatten()) #NEED TO OVERLAP/AVG
    #generated_seq.append((prev_input[1:]+prediction[:-1])/2)
    #generated_seq.append(prediction[-1])
    prev_input = prediction
make_wav(output_filename, numpy.asarray(generated_seq).flatten())