Example #1
0
from scipy.io import wavfile

order = 34
alpha = 0.4
stage = 2
gamma = -1.0 / stage

mgc_sp = outputs
mgc_sp_test = numpy.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])
mgc_sp_test = mgc_sp_test.astype('float64').copy(order = 'C')

mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)

f0, sp = next(data_stream.get_epoch_iterator())

x_synth = mgcf02wav(mgc_reconstruct, f0[2])
x_synth = .95 * x_synth/max(abs(x_synth)) * 2**15
wavfile.write(save_dir+"samples/best_"+experiment_name+"9_scaled.wav", 16000, x_synth.astype('int16'))




# f0, sp = next(data_stream.get_epoch_iterator())
# sp = sp[0]
# f0 = f0[1]

# mgc_sp = sp # For true data
# mgc_sp_test = numpy.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])
# mgc_sp_test = mgc_sp_test.astype('float64').copy(order = 'C')

# mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)
Example #2
0
	# mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)

	# x_synth = mgcf02wav(mgc_reconstruct, sampled_f0_corrected)
	# x_synth = .95 * x_synth/max(abs(x_synth)) * 2**15
	# wavfile.write(save_dir+"samples/best_"+experiment_name+num_sample+str(this_sample)+ ".wav", 16000, x_synth.astype('int16'))

	#Scaling
	outputs[outputs>11.866405] = 11.866405
	outputs[outputs<-2.0992377] = -2.0992377

	f, axarr = pyplot.subplots(2, sharex=True)
	f.set_size_inches(100,35)
	axarr[0].imshow(outputs.T)
	#axarr[0].colorbar()
	axarr[0].invert_yaxis()
	axarr[0].set_ylim(0,257)
	axarr[0].set_xlim(0,2048)
	axarr[1].plot(sampled_f0,linewidth=3)
	axarr[0].set_adjustable('box-forced')
	axarr[1].set_adjustable('box-forced')
	pyplot.savefig(save_dir+"samples/best_"+experiment_name+num_sample+str(this_sample)+"_scaled.png")
	pyplot.close()

	mgc_sp = outputs 
	mgc_sp_test = numpy.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])
	mgc_sp_test = mgc_sp_test.astype('float64').copy(order = 'C')
	mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)
	x_synth = mgcf02wav(mgc_reconstruct, sampled_f0_corrected)
	x_synth = .95 * x_synth/max(abs(x_synth)) * 2**15
	wavfile.write(save_dir+"samples/best_"+experiment_name+num_sample+str(this_sample)+ "_scaled.wav", 16000, x_synth.astype('int16'))
Example #3
0
mgc_sp_test = numpy.hstack([mgc_sp, mgc_sp[:, ::-1][:, 1:-1]])
mgc_sp_test = mgc_sp_test.astype('float64').copy(order='C')

mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep,
                                         1,
                                         mgc_sp_test,
                                         order,
                                         alpha,
                                         gamma,
                                         eps=0.0012,
                                         etype=1,
                                         itype=2)

f0, sp = next(data_stream.get_epoch_iterator())

x_synth = mgcf02wav(mgc_reconstruct, f0[2])
x_synth = .95 * x_synth / max(abs(x_synth)) * 2**15
wavfile.write(save_dir + "samples/best_" + experiment_name + "9_scaled.wav",
              16000, x_synth.astype('int16'))

# f0, sp = next(data_stream.get_epoch_iterator())
# sp = sp[0]
# f0 = f0[1]

# mgc_sp = sp # For true data
# mgc_sp_test = numpy.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])
# mgc_sp_test = mgc_sp_test.astype('float64').copy(order = 'C')

# mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)

# f0, sp = next(data_stream.get_epoch_iterator())
    pyplot.savefig(save_dir + "samples/new/data" + str(this_sample) + ".png")
    pyplot.close()

    mgc_sp = sp_tr[this_sample]
    mgc_sp_test = numpy.hstack([mgc_sp, mgc_sp[:, ::-1][:, 1:-1]])
    mgc_sp_test = mgc_sp_test.astype('float64').copy(order='C')
    mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep,
                                             1,
                                             mgc_sp_test,
                                             order,
                                             alpha,
                                             gamma,
                                             eps=0.0012,
                                             etype=1,
                                             itype=2)
    x_synth = mgcf02wav(mgc_reconstruct, f0_tr[this_sample])
    x_synth = .95 * x_synth / max(abs(x_synth)) * 2**15
    wavfile.write(
        save_dir + "samples/new/data" + num_sample + str(this_sample) + ".wav",
        16000, x_synth.astype('int16'))

main_loop = load(save_dir + "pkl/best_" + experiment_name + ".pkl")

lookup, generator = main_loop.model.get_top_bricks()

from theano import tensor, function
phonemes = tensor.imatrix('phonemes')

sample = ComputationGraph(
    generator.generate(attended=lookup.apply(phonemes),
                       n_steps=phonemes.shape[0],
Example #5
0
	axarr[1].plot(sampled_f0,linewidth=3)
	axarr[0].set_adjustable('box-forced')
	axarr[1].set_adjustable('box-forced')
	pyplot.savefig(save_dir+"samples/best_"+experiment_name+num_sample+str(this_sample)+".png")
	pyplot.close()

	sampled_f0_corrected = sampled_f0
	sampled_f0_corrected[sampled_f0_corrected<0] = 0.

	mgc_sp = outputs 
	mgc_sp_test = numpy.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])
	mgc_sp_test = mgc_sp_test.astype('float64').copy(order = 'C')

	mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)

	x_synth = mgcf02wav(mgc_reconstruct, sampled_f0_corrected)
	x_synth = .95 * x_synth/max(abs(x_synth)) * 2**15
	wavfile.write(save_dir+"samples/best_"+experiment_name+num_sample+str(this_sample)+ ".wav", 16000, x_synth.astype('int16'))

	#Scaling
	outputs[outputs>11.866405] = 11.866405
	outputs[outputs<-2.0992377] = -2.0992377

	f, axarr = pyplot.subplots(2, sharex=True)
	f.set_size_inches(100,35)
	axarr[0].imshow(outputs.T)
	#axarr[0].colorbar()
	axarr[0].invert_yaxis()
	axarr[0].set_ylim(0,257)
	axarr[0].set_xlim(0,2048)
	axarr[1].plot(sampled_f0,linewidth=3)
	axarr[0].invert_yaxis()
	axarr[0].set_ylim(0,257)
	axarr[0].set_xlim(0,2048)
	axarr[1].plot(f0_tr[this_sample],linewidth=3)
	axarr[2].plot(phonemes_tr[:,this_sample], linewidth=3)
	axarr[2].set_adjustable('box-forced')
	axarr[0].set_adjustable('box-forced')
	axarr[1].set_adjustable('box-forced')
	pyplot.savefig(save_dir+"samples/new/data"+str(this_sample)+".png")
	pyplot.close()

	mgc_sp = sp_tr[this_sample]
	mgc_sp_test = numpy.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])
	mgc_sp_test = mgc_sp_test.astype('float64').copy(order = 'C')
	mgc_reconstruct = numpy.apply_along_axis(SPTK.mgcep, 1, mgc_sp_test, order, alpha, gamma, eps = 0.0012, etype = 1, itype = 2)
	x_synth = mgcf02wav(mgc_reconstruct, f0_tr[this_sample])
	x_synth = .95 * x_synth/max(abs(x_synth)) * 2**15
	wavfile.write(save_dir+"samples/new/data"+num_sample+str(this_sample)+ ".wav", 16000,
	x_synth.astype('int16'))

main_loop = load(save_dir+"pkl/best_"+experiment_name+".pkl")

lookup,generator = main_loop.model.get_top_bricks()

from theano import tensor, function
phonemes = tensor.imatrix('phonemes')

sample = ComputationGraph(
	generator.generate(
		attended=lookup.apply(phonemes),
		n_steps=phonemes.shape[0],