def gen_wav(savename): start= 1 stop = 101 slice = 'N' wavtype='timit' postfix='g0.1' #mdl = cPickle.load(open('_'.join(savename.split('_')[:-1])+'.pkl'))#[0] mdl,z_tot, _ = cPickle.load(open(savename))#[1] image_w = mdl.layers[0].input_shape[-1] z_in = T.tensor4() decode_out = theano.function([z_in], mdl.decode(z_in)) x_tot = load_data_timit_seq('test', start, stop, image_w, wavtype,slice).get_value() z_tot = z_tot.get_value() z_tot = z_tot+0.01*np.asarray(np.random.normal(size=z_tot.shape),dtype=theano.config.floatX) z_dec_tot = decode_out(z_tot) while True: #index = raw_input('input_index : ') index = 1 if index == 'q': break else: index = int(index) x = x_tot[index].flatten() z = z_tot[index].flatten() z_dec = z_dec_tot[index].flatten() f, (ax1, ax2) = plt.subplots(2,1) ax1.plot(np.asarray([z_dec,x]).T, linewidth=2.0) ax1.legend(['z_dec','x']) ax2.plot(np.asarray([z]).T) ax2.legend(['z']) for i in xrange(z_tot.shape[1]): ax2.axvline(x=i*z_tot.shape[3],color='k', linestyle='--') #plt.show() plt.savefig(os.path.splitext(savename)[0]+'_'+str(index)+postfix+'.png') plt.close() for ind,save_wav in enumerate([z_dec, x_tot[index]]): x_dec_sav = save_wav*_std+_mean x_dec_sav = np.asarray(x_dec_sav, dtype=np.int16) wavfile.write(os.path.splitext(savename)[0]+'_'+str(index)+'_'+str(ind)+postfix+'.wav',16000, x_dec_sav) break
def gen_wav(savename): mdl, _ = cPickle.load(open(savename))#[1] image_w = mdl.input_shape[0][-1] # image_w of the first layer batch_size = mdl.batch_size channel = 1 start= 1 stop = start + batch_size slice = 'N' wavtype='timit' x_in = T.matrix() #x_re = x_in.reshape((image_w, batch_size, channel)) x_re = x_in.reshape((batch_size,channel, 1, image_w)) recon = theano.function([x_in], mdl.recon(x_re)) x_tot = load_data_timit_seq('test', start, stop, image_w, wavtype,slice,rand='Y').get_value() x_tot_rec = recon(x_tot) while True: index = raw_input('input_index : ') #index = 1 if index == 'q': break else: index = int(index) x = x_tot[index].flatten() x_rec = x_tot_rec[index].flatten() plt.plot(np.asarray([x_rec,x]).T, linewidth=2.0) plt.legend(['x_rec','x']) plt.show() #plt.savefig(os.path.splitext(savename)[0]+'_'+str(index)+'.png') plt.close() # break '''
def evaluate_rclayer(): dataset = 'timit' gridx = 10 gridy = 10 channel = 1 n_hids = 300 wavtype = 'timit' learning_rate = 0.01 batch_size = 10 start = 0 stop = None valid_stop = None segment = 'Y' image_w = 256 filter_w = 128 stride = 64 learning_rule = 'mom' threshold = np.float32(1.) mom = 0.96 mom_an = 0 st_an = 100 dec_hid = 'std' postfix = '' savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rclayer/' if not os.path.exists(savepath): os.makedirs(savepath) filename = time.strftime("%m%d")+'_recconv_pure_tdnn_' + wavtype + \ '_' + str(stop) + \ '_' + str(image_w) + \ postfix savename = savepath + filename print savename if os.path.exists(savename + '.pkl'): ans = raw_input('Same exp. exists, continue? ([Y]/N) ') if ans.upper() == 'N': return nrng = np.random.RandomState(23455) trng = RandomStreams(nrng.randint(1073741824)) if dataset == 'timit': from code.utils.load_data import load_data_timit_seq train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment, normtype=dec_hid) valid_set_x = load_data_timit_seq('valid', start, valid_stop, image_w, wavtype, segment, normtype=dec_hid) test_set_x = load_data_timit_seq('test', start, valid_stop, image_w, wavtype, segment, normtype=dec_hid) else: raise ValueError('invalid dataset') n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] n_test_batches0 = test_set_x.get_value(borrow=True).shape[0] if batch_size > min(n_train_batches0,n_valid_batches0,n_test_batches0): print 'Maximum batch size is %d' % min(n_train_batches0,n_valid_batches0, n_test_batches0) batch_size = min(n_train_batches0,n_test_batches0,n_valid_batches0) n_train_batches = n_train_batches0 / batch_size n_valid_batches = n_valid_batches0 / batch_size n_test_batches = n_test_batches0 / batch_size index = T.lscalar() x = T.matrix('x') #theano.config.compute_test_value = 'warn' #x.tag.test_value = np.asarray(np.random.rand(batch_size,image_w),dtype='float32') print '... building the model' x_re = x.reshape((batch_size, channel, 1, image_w)) # shape is for [(enc_proj),(dec_proj)] input_shape = [(batch_size, channel, 1, image_w),(batch_size, n_hids, 1, image_w/stride)] filter_shape = [(n_hids,channel,1,filter_w),(channel,n_hids,1,filter_w)] mdl = model( rng=nrng, n_hids=n_hids, filter_shape=filter_shape, input_shape=input_shape, channel =channel, stride=stride,) cost = mdl.cost(x_re) params = mdl.params grads = T.grad(cost, params) norm2 = 0. for g in grads: norm2 += (g**2.).sum() grads = [T.switch(T.sqrt(norm2) > threshold, threshold * g/T.sqrt(norm2), g) for g in grads] gradsdic = dict(zip(params,grads)) if learning_rule == 'con': updates = [] for (param_i, grad_i,) in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) elif learning_rule == 'ada': ad = AdaDelta() updates = ad.get_updates(learning_rate, gradsdic) elif learning_rule == 'mom': mm = Momentum(mom) updates = mm.get_updates(learning_rate, gradsdic) else: raise ValueError('invalid learning_rule') train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) validate_model = theano.function( inputs=[index], outputs=cost, givens={x: valid_set_x[(index * batch_size):((index + 1) * batch_size)]}) test_model = theano.function( inputs=[index], outputs=cost, givens={x: test_set_x[(index * batch_size):((index + 1) * batch_size)]}) print '... training' first_lr = learning_rate best_validation_loss = np.inf start_time = time.clock() last_epoch_start_time = time.clock() train_score = [] valid_score = [] epoch = 0 training_check_freq = np.inf valid_check_freq = np.inf#np.ceil(n_train_batches/10) improvement_threshold = 0.9 done_looping = False n_epochs = 1000 while (epoch < n_epochs) and (not done_looping): sum_cost = 0. epoch_start_time = time.clock() start_time = epoch_start_time valid_time = epoch_start_time epoch = epoch + 1 if epoch > st_an and learning_rule in ('con', 'mom'): learning_rate = first_lr / (epoch - st_an) if learning_rule == 'mom': mm.learning_rate = first_lr / (epoch - st_an) print mm.learning_rate if epoch == 0: mm.momentum = 0 elif epoch < mom_an: mm.momentum = mom / (mom_an - e) mm.momentum = mom for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index cost_ij = train_model(minibatch_index) sum_cost += cost_ij if (iter+1) % training_check_freq == 0: print 'training @ iter = %i, time = %3.2fs, train cost/update = %f, %s'% \ (iter+1, time.clock() - start_time, cost_ij, filename) start_time = time.clock() if (iter+1) % valid_check_freq == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print 'trainin @ iter = %i, time = %3.2fs, valid cost %f, %s ' % ( iter+1, time.clock() - valid_time, this_validation_loss, filename) valid_time = time.clock() # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter #with open(savename+'.pkl', 'wb') as f: # pickle.dump(mdl,f) validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) valid_score.append(this_validation_loss) train_score.append(sum_cost/n_train_batches) monitor = [train_score,valid_score] plt.plot([valid_score,train_score]) plt.legend(['valid','train']) plt.savefig(savename+'.png') plt.close() print ' %3i epoch, train error %f, valid error %f, takes %3.2fs, %s' % ( epoch, sum_cost/n_train_batches, this_validation_loss, time.clock() - epoch_start_time, filename) # if we got the best validation score until now if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss best_iter = iter with open(savename+'.pkl', 'wb') as f: pickle.dump([mdl,monitor],f) end_time = time.clock() print 'Optimization complete. total time is %3.2f' % (end_time - start_time) print savename
def gen_wav(savename=None): mdl = cPickle.load(open(savename)) if type(mdl)==list: mdl, cost = mdl dataset = 'timit' batch_size = 100 start = 100 stop = start+batch_size image_h = 1 image_w = 320 using_noise = False savefreq = 10 eps_decay = np.inf #eps_decay = 100. eps0 = 1. wavtype= 'noise' which_set = 'test' index = int(raw_input('Input index [0,%s): '%(batch_size))) noise_lvl = None # How about anneling noise? mdl.set_image_size(image_w) fname0 = filename(wavtype,which_set,index,image_w,eps_decay,using_noise) from code.utils.load_data import load_data_timit_seq set_x = load_data_timit_seq(which_set, start, stop, lend=image_w, wavtype=wavtype) #image_w = set_x.shape[1] x = T.matrix('x') epsilon = T.scalar('e') x_reshape = x.reshape((1, 1, image_h, image_w)) if using_noise == True: x_reshape = mdl.corruptor(x_reshape, noise_lvl) y = epsilon * mdl.reconstruction(x_reshape) + (1-epsilon)* x_reshape# For climbing predict = theano.function([x,epsilon],[x_reshape,y]) # For climbing x_in = set_x.get_value()[index,:] x_rec = x_in cntiter = 0 eps = eps0 # Iteration for denoising while True: niter = raw_input('Input niter (cumulative, 0 for initialization): ') if niter == '0': index = int(raw_input('Input index [0,%s), prev was %i: '%(batch_size,index))) # Initialize values fname0 = filename(wavtype,which_set,index,image_w,eps_decay,using_noise) eps=eps0 cntiter = 0 x_in = set_x.get_value()[index,:] x_rec = x_in continue elif niter =='decay': if eps_decay == np.inf: eps_decay = 100. elif eps_decay== 100.: eps_decay = np.inf fname0 = filename(wavtype,which_set,index,image_w,eps_decay,using_noise) print 'decay is changed to %s' %str(eps_decay) continue elif not niter.isdigit(): print 'input is not integer. Try again' continue niter = int(niter) for i in range(niter): cntiter += 1 if cntiter >= 100: eps = eps0 - cntiter/eps_decay if eps < 0.: eps=0. x_rec = np.asmatrix(x_rec,dtype=theano.config.floatX) x_cor, x_rec = predict(x_rec,eps) if cntiter%savefreq==0: f, (ax1, ax2, ax3) = plt.subplots(3,1,sharex=True,sharey=True) x_in = np.array(x_in).flatten() ax1.plot(x_in) x_cor = np.array(x_cor).flatten() ax2.plot(x_cor) x_rec = np.array(x_rec).flatten() ax3.plot(x_rec) ax1.set_title('initial test input') ax2.set_title('%dth corrupted' % cntiter) ax3.set_title('%dth reconstructed' % cntiter) ylim = max(x_cor) ax1.axis([0,x_in.shape[0],-ylim,ylim]) ax2.axis([0,x_in.shape[0],-ylim,ylim]) ax3.axis([0,x_in.shape[0],-ylim,ylim]) plt.show() fname = fname0 + '_iter'+str(cntiter) fpath=os.path.dirname(savename)+'/wavpng/'+os.path.splitext(os.path.basename(savename))[0] if not os.path.exists(fpath): os.makedirs(fpath) print os.path.join(fpath,fname) plt.savefig(os.path.join(fpath,fname)+'.png') plt.close()
def evaluate_rclayer(): dataset = 'timit' gridx = 10 gridy = 10 channel = 1 n_hids = 100 wavtype = 'timit' learning_rate = 0.001 batch_size = 256 start = 0 stop = 50 segment = 'Y' image_w = 256 learning_rule = 'ada' threshold = np.float32(1.) mom = 0.96 mom_an = 0 st_an = 100 dec_hid = 'std' postfix = '_trial' savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rclayer/' if not os.path.exists(savepath): os.makedirs(savepath) filename = 'recconv_' + wavtype + \ '_' + str(stop) + \ '_' + str(image_w) + \ postfix savename = savepath + filename print savename if os.path.exists(savename + '.pkl'): ans = raw_input('Same exp. exists, continue? ([Y]/N) ') if ans.upper() == 'N': return nrng = np.random.RandomState(23455) trng = RandomStreams(nrng.randint(1073741824)) if dataset == 'timit': from code.utils.load_data import load_data_timit_seq train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment,normtype=dec_hid) if not stop == None: stop_ = int(np.ceil(stop/3)) valid_set_x = load_data_timit_seq('valid', start, stop_, image_w, wavtype, segment,normtype=dec_hid) test_set_x = load_data_timit_seq('test', start, stop_, image_w, wavtype, segment,normtype=dec_hid) else: raise ValueError('invalid dataset') n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] n_test_batches0 = test_set_x.get_value(borrow=True).shape[0] if batch_size > min(n_train_batches0,n_valid_batches0,n_test_batches0): print 'Maximum batch size is %d' % min(n_train_batches0,n_valid_batches0, n_test_batches0) batch_size = min(n_train_batches0,n_test_batches0,n_valid_batches0) n_train_batches = n_train_batches0 / batch_size n_valid_batches = n_valid_batches0 / batch_size n_test_batches = n_test_batches0 / batch_size index = T.lscalar() x = T.matrix('x') print '... building the model' x_re = x.reshape((image_w, batch_size, channel)) mdl = model(rng=nrng,n_hids=n_hids) cost = mdl.cost(x_re) params = mdl.params grads = T.grad(cost, params) norm2 = 0. for g in grads: norm2 += (g**2.).sum() grads = [T.switch(T.sqrt(norm2) > threshold, threshold * g/T.sqrt(norm2), g) for g in grads] gradsdic = dict(zip(params,grads)) if learning_rule == 'con': updates = [] for (param_i, grad_i,) in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) elif learning_rule == 'ada': ad = AdaDelta() updates = ad.get_updates(learning_rate, gradsdic) elif learning_rule == 'mom': mm = Momentum(mom) updates = mm.get_updates(learning_rate, gradsdic) else: raise ValueError('invalid learning_rule') train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)].T}) validate_model = theano.function( inputs=[index], outputs=cost, givens={x: valid_set_x[(index * batch_size):((index + 1) * batch_size)].T}) test_model = theano.function( inputs=[index], outputs=cost, givens={x: test_set_x[(index * batch_size):((index + 1) * batch_size)].T}) print '... training' first_lr = learning_rate best_validation_loss = np.inf start_time = time.clock() last_epoch_start_time = time.clock() score = [] epoch = 0 patience = 100000 patience_increase = 1.001 training_check_freq = 10 validation_frequency = min(n_train_batches, patience / 2) improvement_threshold = 0.9 done_looping = False n_epochs = 100 while (epoch < n_epochs) and (not done_looping): epoch_start_time = time.clock() epoch = epoch + 1 if epoch > st_an and learning_rule in ('con', 'mom'): learning_rate = first_lr / (epoch - st_an) if learning_rule == 'mom': mm.learning_rate = first_lr / (epoch - st_an) print mm.learning_rate if epoch == 0: mm.momentum = 0 elif epoch < mom_an: mm.momentum = mom / (mom_an - e) mm.momentum = mom for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index cost_ij = train_model(minibatch_index) if iter % training_check_freq == 0: print 'training @ iter = %5i, time = %3.2fs, training cost = %f, %s'% \ (iter, time.clock() - start_time, cost_ij, filename) sys.stdout.flush() start_time = time.clock() if (iter + 1) % validation_frequency == 0: valid_time = time.clock() # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print ' %3i, valid error %f, %.2fs, %s ' % ( epoch, cost_ij, time.clock() - start_time, filename) score.append(this_validation_loss) plt.plot(xrange(len(score)),score) plt.savefig(savename+'.png') plt.close() # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter with open(savename+'.pkl', 'wb') as f: pickle.dump([mdl,score],f) # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' test error %f') % (test_score)) if patience <= iter: done_looping = True break print ' %3i epoch, takes %3.2fs' % (epoch, time.clock() - epoch_start_time) ''' while True: epoch_start_time = time.clock() epoch = epoch + 1 if epoch > st_an and learning_rule in ('con', 'mom'): learning_rate = first_lr / (epoch - st_an) if learning_rule == 'mom': mm.learning_rate = first_lr / (epoch - st_an) print mm.learning_rate if epoch == 0: mm.momentum = 0 elif epoch < mom_an: mm.momentum = mom / (mom_an - e) mm.momentum = mom cost_ij = 0 for minibatch_index in xrange(n_train_batches): cost_it = train_model(minibatch_index) cost_ij += cost_it cost_ij /= 2 * n_train_batches print '%3i, test error %f, %.2fs, %s ' % ( epoch, cost_ij, time.clock() - epoch_start_time, filename) last_epoch_start_time = time.clock() score.append(cost_ij) if epoch % 10 == 0: if cost_ij < best_validation_loss: best_validation_loss = cost_ij with open(savename + '.pkl', 'wb') as f: pickle.dump([mdl, score], f) ''' end_time = time.clock() print 'Optimization complete. total time is %3.2f' % end_time - start_time print savename
def gen_wav_rtdnn(savename): osrtdnn = cPickle.load(open(savename)) learning_rate = 0.1 n_epochs = 10000000 dataset = 'timit' batch_size, _, _, image_w = osrtdnn.input_shape[0] gridx = 10 gridy = 10 start = 1 stop = start + batch_size channel = 1 wavtype = 'timit' learning_rule = 'mom' slice = 'N' mom = 0.96 postfix = '_z' savename = os.path.splitext(savename)[0]+postfix train_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype,slice) # compute number of minibatches for training, validation and testing n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_train_batches = n_train_batches0 / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' osrtdnn_input = x.reshape(osrtdnn.input_shape[0]) cost = osrtdnn.cost(osrtdnn_input) zparams = osrtdnn.zparams zgrads = T.grad(cost, zparams) zgradsdic = dict(zip(zparams,zgrads)) if learning_rule == 'ada': ad = AdaDelta() zupdates = ad.get_updates(learning_rate, zgradsdic) elif learning_rule == 'con': zupdates = [] for param_i, grad_i in zip(zparams, zgrads): zupdates.append((param_i, param_i - learning_rate * grad_i)) elif learning_rule == 'mom': momentum = mom mm = Momentum(momentum) zupdates = mm.get_updates(learning_rate, zgradsdic) else: raise ValueError('invalid learning_rule') train_z_model = theano.function( inputs = [index], outputs = cost, updates = zupdates, givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) first_lr = learning_rate st_an = 200 en_an = 2000 best_params = None best_validation_loss = np.inf test_score = 0. start_time = time.clock() epoch_start_time=0 score_cum=[] epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): last_epoch_start_time = epoch_start_time epoch_start_time = time.clock() epoch = epoch + 1 if epoch > st_an and learning_rule in ['con','mom']: learning_rate = first_lr/(epoch-st_an) cost_ij=0 for minibatch_index in xrange(n_train_batches): cost_ij += train_z_model(minibatch_index) cost_ij /= (2*(n_train_batches)) # compute loss on validation set this_validation_loss = cost_ij print('%3i, training error %f, %.2fs/%.2fs, %s ' % \ (epoch, this_validation_loss, (time.clock() - epoch_start_time), (epoch_start_time-last_epoch_start_time), savename)) score_cum.append(this_validation_loss) # if we got the best validation score until now if epoch%100==0 and this_validation_loss < best_validation_loss: #plot score plt.plot(xrange(len(score_cum)),score_cum) plt.savefig(savename+'.png') plt.close() # save best validation score and iteration number best_validation_loss = this_validation_loss osrtdnn.set_cost(best_validation_loss) with open(savename+'.pkl', 'wb') as f: cPickle.dump(osrtdnn,f) end_time = time.clock() print('Optimization complete.') print('Best validation score of %f, with test performance %f' % (best_validation_loss, test_score)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print savename
from evaluate import model from rconv_layers import RecursiveConvolutionalLayer from code.utils.load_data import load_data_timit_seq assert len(sys.argv) == 2, "Input argument!" start = 0 stop = None image_w = 256 channel = 1 wavtype = " timit" segment = "Y" dec_hid = "std" batch_size = 16 valid_set_x = load_data_timit_seq("valid", start, stop, image_w, wavtype, segment, normtype=dec_hid) n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches = n_valid_batches0 / batch_size index = T.lscalar() x = T.matrix("x") x_re = x.reshape((image_w, batch_size, channel)) mdl = cPickle.load(open(sys.argv[1]))[0] cost = mdl.cost(x_re) validate_model = theano.function( inputs=[index], outputs=cost, givens={x: valid_set_x[(index * batch_size) : ((index + 1) * batch_size)].T} ) validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
def evaluate_rtdnn(): learning_rate = 0.1 n_epochs = 10000 dataset = 'timit' batch_size = 10 start = 0 stop = start+batch_size channel = 1 image_h = 1 image_w = 256 filter_h = 1 filter_w = 4 nkerns = [4] wavtype = 'timit' learning_rule = 'mom' mom = 0.96 dechid = 'tanh' postfix = '' savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rtdnn/result/' if not os.path.exists(savepath): os.makedirs(savepath) filename = 'osrtdnn'+\ '_toy'+wavtype+\ '_w'+str(filter_w)+\ '_'+learning_rule+\ '_'+dechid+\ postfix savename = savepath + filename if os.path.exists(savename+'.pkl'): ans=raw_input('Same exp. exists, continue? ([Y]/N) ') if ans.upper() == 'N': return nrng = np.random.RandomState(23455) trng = RandomStreams(nrng.randint(2 ** 30)) if dataset == 'mnist.pkl.gz': from code.utils.load_data import load_data_mnist datasets = load_data_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] elif dataset == 'timit': from code.utils.load_data import load_data_timit_seq train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype) valid_set_x = load_data_timit_seq('valid', start, stop, image_w, wavtype) test_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype) # compute number of minibatches for training, validation and testing n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] n_test_batches0 = test_set_x.get_value(borrow=True).shape[0] n_train_batches = n_train_batches0 / batch_size n_valid_batches = n_valid_batches0 / batch_size n_test_batches = n_test_batches0 / batch_size assert min(n_train_batches, n_valid_batches, n_test_batches)>0,\ 'Maximum batch size is %d' % min(n_train_batches0, n_valid_batches0, n_test_batches0) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' osrtdnn_input = x.reshape((channel, image_h, image_w, batch_size)) input_shape = [] filter_shape = [] for i in xrange(len(nkerns)): if i == 0: input_shape.append((channel, image_h, image_w/(2 ** len(nkerns)), batch_size)) filter_shape.append((channel, filter_h, filter_w, nkerns[0])) else: input_shape.append((nkerns[i-1], image_h, image_w/(2 ** (len(nkerns)-i)), batch_size)) filter_shape.append((nkerns[i-1], filter_h, filter_w, nkerns[i])) osrtdnn = rtdnn( nrng = nrng, trng=trng, input_shape = input_shape, filter_shape = filter_shape, dec_hid=dechid, ) cost = osrtdnn.cost(osrtdnn_input) params = osrtdnn.params grads = T.grad(cost, params) gradsdic = dict(zip(params,grads)) if learning_rule == 'ada': ad = AdaDelta() updates = ad.get_updates(learning_rate, gradsdic) elif learning_rule == 'con': updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) elif learning_rule == 'mom': momentum = mom mm = Momentum(momentum) updates = mm.get_updates(learning_rate, gradsdic) else: raise ValueError('invalid learning_rule') train_model = theano.function( inputs = [index], outputs = cost, updates = updates, givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function( inputs = [index], outputs = cost, givens = {x: valid_set_x[index * batch_size: (index + 1) * batch_size]}) test_model = theano.function( inputs = [index], outputs = cost, givens = {x: test_set_x[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) first_lr = learning_rate st_an = 800 en_an = 2000 best_params = None best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() score_cum=[] epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch > st_an and learning_rule in ['con','mom']: learning_rate = first_lr/(epoch-st_an) #if epoch >= st_an and epoch < en_an: # learning_rate -= first_lr/(en_an-st_an) #elif epoch >=en_an: # learning_rate = 0. for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 1000 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print(' %3i, validation error %f, %s ' % \ (epoch, this_validation_loss, filename)) score_cum.append(this_validation_loss) plt.plot(xrange(len(score_cum)),score_cum) plt.savefig(savename+'.png') plt.close() # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter osrtdnn.set_cost(best_validation_loss) with open(savename+'.pkl', 'wb') as f: pickle.dump(osrtdnn,f) # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' test error %f') % (test_score)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f obtained at iteration %i, with test performance %f' % (best_validation_loss, best_iter + 1, test_score)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print savename
def evaluate_rtdnn(): # Not so important n_epochs = 500 dataset = 'timit' channel = 1 image_h = 1 filter_h = 1 # Data type : 'timit', 'sin', 'rect', ... wavtype = 'timit' # Control Data size and batch size start = 0 stop = None valid_stop = None segment = 'Y' batch_size = 256 #Learning rule and rate learning_rule = 'ada' learning_rate = 0.1 mom = 0.96 mom_an = 0 st_an = 100 # Control Layer's characteristics image_w = 256 dechid = ['lin','tanh','tanh'] nkerns = [40,40,4] stride = [2,2,2] filter_w = 25 postfix = '' if not len(nkerns) == len(dechid) or not len(nkerns) == len(stride): raise ValueError('nkerns, dechid, stride should have same length') savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rtdnn/result/' if not os.path.exists(savepath): os.makedirs(savepath) filename = time.strftime("%m%d") + '_tdnn_' + wavtype + \ '_' + str(batch_size) + \ '_' + str(stop) + \ '_' + str(image_w) + \ '_' + str(filter_w) + \ '_' + str(nkerns[-1]) + ':' + str(np.prod(stride)) + \ '_' + learning_rule + \ '_' + str(learning_rate) + \ '_' + dechid[-1] \ + postfix savename = savepath + filename print savename if os.path.exists(savename + '.pkl'): ans = raw_input('Same exp. exists, continue? ([Y]/N) ') if ans.upper() == 'N': return nrng = np.random.RandomState(23455) if dataset == 'mnist.pkl.gz': from code.utils.load_data import load_data_mnist datasets = load_data_mnist(dataset) (train_set_x, train_set_y,) = datasets[0] (valid_set_x, valid_set_y,) = datasets[1] (test_set_x, test_set_y,) = datasets[2] elif dataset == 'timit': from code.utils.load_data import load_data_timit_seq train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment) valid_set_x = load_data_timit_seq('valid', start, valid_stop, image_w, wavtype, segment) n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] if batch_size > min(n_train_batches0, n_valid_batches0): print 'Maximum batch size is %d' % n_train_batches0 batch_size = min(n_train_batches0,n_valid_batches0) n_train_batches = n_train_batches0 / batch_size n_valid_batches = n_valid_batches0 / batch_size index = T.lscalar() x = T.matrix('x') print '... building the model' x_re = x.reshape((batch_size, channel, image_h, image_w)) input_shape = [] filter_shape = [] for i in xrange(len(nkerns)): if i == 0: filter_shape.append((nkerns[0], channel, filter_h, filter_w)) input_shape.append((batch_size, channel, image_h, image_w)) else: filter_shape.append((nkerns[i], nkerns[(i - 1)], filter_h, filter_w)) input_shape.append((batch_size, nkerns[(i - 1)], image_h, image_w / 2 ** i)) # stride should be changed osrtdnn = rtdnn(nrng=nrng, input_shape=input_shape, filter_shape=filter_shape, stride=stride, dec_hid=dechid) # Initialization of hidden representation osrtdnn.set_batch_size(n_train_batches0) x_tot_shape = x.reshape((n_train_batches0, channel, image_h, image_w)) z_val = osrtdnn.encode(x_tot_shape) z_init = theano.function([x], z_val) z_tot = theano.shared(value=z_init(train_set_x.get_value()), borrow=True) osrtdnn.set_batch_size() (cost, cost_dec, cost_rec,) = osrtdnn.cost(x_re, z_tot[(index * batch_size):((index + 1) * batch_size)]) fparams = osrtdnn.fparams fgrads = T.grad(cost, fparams) fgradsdic = dict(zip(fparams, fgrads)) zgrads = T.grad(cost, z_tot) zgradsdic = {z_tot: zgrads} if learning_rule == 'ada': ad = AdaDelta() fupdates = ad.get_updates(learning_rate, fgradsdic) zupdates = ad.get_updates(learning_rate, zgradsdic) elif learning_rule == 'con': fupdates = [] for (param_i, grad_i,) in zip(fparams, fgrads): fupdates.append((param_i, param_i - learning_rate * grad_i)) zupdates = {z_tot: z_tot - zgrads} elif learning_rule == 'mom': momentum = mom mm = Momentum(momentum) fupdates = mm.get_updates(learning_rate, fgradsdic) zupdates = mm.get_updates(learning_rate, zgradsdic) else: raise ValueError('invalid learning_rule') train_z_model = theano.function( inputs=[index], outputs=[cost, cost_dec, cost_rec], updates=zupdates, givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) train_f_model = theano.function( inputs=[index], outputs=[cost, cost_dec, cost_rec], updates=fupdates, givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) valid_model = theano.function( inputs=[index], outputs=[cost, cost_dec, cost_rec], givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 train_check_freq = np.inf valid_check_freq = np.inf #min(n_train_batches, patience / 2) first_lr = learning_rate en_an = 2000 best_params = None best_validation_loss = np.inf test_score = 0.0 start_time = time.clock() valid_time = time.clock() score = [] score_dec = [] score_rec = [] monitor = [] epoch = 0 done_looping = False while epoch < n_epochs and not done_looping: epoch_start_time = time.clock() epoch = epoch + 1 sum_cost = 0 sum_cost_dec = 0 sum_cost_rec = 0 if epoch > st_an and learning_rule in ('con', 'mom'): learning_rate = first_lr / (epoch - st_an) if learning_rule == 'mom': mm.learning_rate = first_lr / (epoch - st_an) print mm.learning_rate if epoch == 0: mm.momentum = 0 elif epoch < mom_an: mm.momentum = mom / (mom_an - e) mm.momentum = mom for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index (cost_z, cost_dec_z, cost_rec_z) = train_z_model(minibatch_index) (cost_f, cost_dec_f, cost_rec_f) = train_f_model(minibatch_index) sum_cost += cost_f + cost_z sum_cost_dec += cost_dec_f + cost_dec_z sum_cost_rec += cost_dec_f + cost_dec_z if (iter+1) % train_check_freq == 0: print 'training @ iter = %i, time = %3.2fs, training cost = %f, %f, %s'% \ (iter+1, time.clock() - start_time, cost_f, cost_z, filename) start_time = time.clock() if (iter+1) % valid_check_freq == 0: validation_losses = [valid_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print 'trainin @ iter = %i, time = %3.2fs, valid cost %f, %s ' % ( iter+1, time.clock() - valid_time, this_validation_loss, filename) valid_time = time.clock() # if we got the best validation score until now #if this_validation_loss < best_validation_loss: # best_validation_loss = this_validation_loss # best_iter = iter # with open(savename+'.pkl', 'wb') as f: # pickle.dump([osrtdnn, z_tot, monitor],f) validation_losses = [valid_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) sum_cost /= n_train_batches sum_cost_dec /= n_train_batches sum_cost_rec /= n_train_batches print ' %3i epoch, train error %f, valid error %f, takes %3.2fs, %s' % (epoch, sum_cost, this_validation_loss, time.clock() - epoch_start_time, filename) score.append(sum_cost) score_dec.append(sum_cost_dec) score_rec.append(sum_cost_rec) monitor = [score, score_dec, score_rec] if epoch == 0: if sum_cost < best_validation_loss: best_validation_loss = sum_cost osrtdnn.set_cost(best_validation_loss, sum_cost_dec, sum_cost_rec) with open(savename + '.pkl', 'wb') as f: pickle.dump([osrtdnn, z_tot, monitor], f) end_time = time.clock() print 'Optimization complete.' print 'Best validation score of %f, with test performance %f' % (best_validation_loss, test_score) print >> sys.stderr, 'The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.0) print savename
def fine_tune(savename): osrtdnn = cPickle.load(open(savename))[0] learning_rate = 0.1 n_epochs = 10000000 dataset = 'timit' image_w = 2048 batch_size = 100 gridx = 10 gridy = 10 start = 1 stop = start + batch_size channel = 1 wavtype = 'timit' learning_rule = 'ada' slice = 'N' mom = 0.96 postfix = '_z'+str(image_w) ind = 1 savename = os.path.splitext(savename)[0]+postfix train_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype,slice) # compute number of minibatches for training, validation and testing n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_train_batches = n_train_batches0 / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # Z initialization osrtdnn.set_batch_size(n_train_batches0) osrtdnn.set_image_w(image_w) x_tot_shape = x.reshape((n_train_batches0, channel, 1, image_w)) z_val = osrtdnn.encode(x_tot_shape) z_init = theano.function([x],z_val) z_tot = theano.shared(value=z_init(train_set_x.get_value()), borrow=True) x_re = x.reshape((batch_size, channel, 1, osrtdnn.layers[0].input_shape[3])) cost,cost_dec,cost_rec = osrtdnn.cost(x_re,z_tot[index*batch_size:(index+1)*batch_size]) zgrads = T.grad(cost, z_tot) zgradsdic = {z_tot:zgrads} if learning_rule == 'ada': ad = AdaDelta() zupdates = ad.get_updates(learning_rate, zgradsdic) elif learning_rule == 'con': zupdates = [] for param_i, grad_i in zip(zparams, zgrads): zupdates.append((param_i, param_i - learning_rate * grad_i)) elif learning_rule == 'mom': momentum = mom mm = Momentum(momentum) zupdates = mm.get_updates(learning_rate, zgradsdic) else: raise ValueError('invalid learning_rule') train_z_model = theano.function( inputs = [index], outputs = [cost, cost_dec,cost_rec], updates = zupdates, givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]}) z_in = T.tensor4() decode_out = theano.function([z_in], osrtdnn.decode(z_in)) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters pat_time = np.inf first_lr = learning_rate st_an = 200 en_an = 2000 best_params = None best_validation_loss = np.inf test_score = 0. start_time = time.clock() epoch_start_time=0 score_cum=[] score_dec_cum=[] score_rec_cum=[] epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch_start_time = time.clock() epoch = epoch + 1 if epoch > st_an and learning_rule in ['con','mom']: learning_rate = first_lr/(epoch-st_an) cost_ij=0 cost_dec_ij=0 cost_rec_ij=0 for minibatch_index in xrange(n_train_batches): cost_ij += train_z_model(minibatch_index)[0] cost_dec_ij += train_z_model(minibatch_index)[1] cost_rec_ij += train_z_model(minibatch_index)[2] cost_ij /= (2*(n_train_batches)) cost_dec_ij /= (2*(n_train_batches)) cost_rec_ij /= (2*(n_train_batches)) score_cum.append(cost_ij) score_dec_cum.append(cost_dec_ij) score_rec_cum.append(cost_rec_ij) # compute loss on validation set print('%3i, training error %.2f, %.2f, %.2f, %.2fs, %s ' % \ (epoch, cost_ij, cost_dec_ij, cost_rec_ij, (time.clock() - epoch_start_time), savename)) # if we got the best validation score until now if (epoch%50==0 and cost_ij < best_validation_loss) or time.clock()-start_time > pat_time: best_validation_loss = cost_ij z_dec = decode_out(z_tot.get_value()) grid_plot.grid_plot((train_set_x.get_value(), z_dec)) #plt.legend('test','decoded') plt.savefig(savename+'.png') plt.close() with open(savename+'.pkl','wb') as f: cPickle.dump([osrtdnn, z_tot, [score_cum, score_dec_cum, score_rec_cum]],f) ''' for i,save_wav in enumerate([z_dec[ind], train_set_x.get_value()[ind]]): x_dec_sav = save_wav*_std+_mean x_dec_sav = np.asarray(x_dec_sav, dtype=np.int16) wavfile.write(os.path.splitext(savename)[0]+'_'+str(ind)+'_'+str(i)+'.wav',16000, x_dec_sav) ''' end_time = time.clock() print('Optimization complete.') print savename