def evaluate_rtdnn(): learning_rate = 0.1 n_epochs = 10000 dataset = 'timit' batch_size = 10 start = 0 stop = start+batch_size channel = 1 image_h = 1 image_w = 256 filter_h = 1 filter_w = 4 nkerns = [4] wavtype = 'timit' learning_rule = 'mom' mom = 0.96 dechid = 'tanh' postfix = '' savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rtdnn/result/' if not os.path.exists(savepath): os.makedirs(savepath) filename = 'osrtdnn'+\ '_toy'+wavtype+\ '_w'+str(filter_w)+\ '_'+learning_rule+\ '_'+dechid+\ postfix savename = savepath + filename if os.path.exists(savename+'.pkl'): ans=raw_input('Same exp. exists, continue? ([Y]/N) ') if ans.upper() == 'N': return nrng = np.random.RandomState(23455) trng = RandomStreams(nrng.randint(2 ** 30)) if dataset == 'mnist.pkl.gz': from code.utils.load_data import load_data_mnist datasets = load_data_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] elif dataset == 'timit': from code.utils.load_data import load_data_timit_seq train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype) valid_set_x = load_data_timit_seq('valid', start, stop, image_w, wavtype) test_set_x = load_data_timit_seq('test', start, stop, image_w, wavtype) # compute number of minibatches for training, validation and testing n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] n_test_batches0 = test_set_x.get_value(borrow=True).shape[0] n_train_batches = n_train_batches0 / batch_size n_valid_batches = n_valid_batches0 / batch_size n_test_batches = n_test_batches0 / batch_size assert min(n_train_batches, n_valid_batches, n_test_batches)>0,\ 'Maximum batch size is %d' % min(n_train_batches0, n_valid_batches0, n_test_batches0) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' osrtdnn_input = x.reshape((channel, image_h, image_w, batch_size)) input_shape = [] filter_shape = [] for i in xrange(len(nkerns)): if i == 0: input_shape.append((channel, image_h, image_w/(2 ** len(nkerns)), batch_size)) filter_shape.append((channel, filter_h, filter_w, nkerns[0])) else: input_shape.append((nkerns[i-1], image_h, image_w/(2 ** (len(nkerns)-i)), batch_size)) filter_shape.append((nkerns[i-1], filter_h, filter_w, nkerns[i])) osrtdnn = rtdnn( nrng = nrng, trng=trng, input_shape = input_shape, filter_shape = filter_shape, dec_hid=dechid, ) cost = osrtdnn.cost(osrtdnn_input) params = osrtdnn.params grads = T.grad(cost, params) gradsdic = dict(zip(params,grads)) if learning_rule == 'ada': ad = AdaDelta() updates = ad.get_updates(learning_rate, gradsdic) elif learning_rule == 'con': updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) elif learning_rule == 'mom': momentum = mom mm = Momentum(momentum) updates = mm.get_updates(learning_rate, gradsdic) else: raise ValueError('invalid learning_rule') train_model = theano.function( inputs = [index], outputs = cost, updates = updates, givens = {x: train_set_x[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function( inputs = [index], outputs = cost, givens = {x: valid_set_x[index * batch_size: (index + 1) * batch_size]}) test_model = theano.function( inputs = [index], outputs = cost, givens = {x: test_set_x[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) first_lr = learning_rate st_an = 800 en_an = 2000 best_params = None best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() score_cum=[] epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch > st_an and learning_rule in ['con','mom']: learning_rate = first_lr/(epoch-st_an) #if epoch >= st_an and epoch < en_an: # learning_rate -= first_lr/(en_an-st_an) #elif epoch >=en_an: # learning_rate = 0. for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 1000 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print(' %3i, validation error %f, %s ' % \ (epoch, this_validation_loss, filename)) score_cum.append(this_validation_loss) plt.plot(xrange(len(score_cum)),score_cum) plt.savefig(savename+'.png') plt.close() # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter osrtdnn.set_cost(best_validation_loss) with open(savename+'.pkl', 'wb') as f: pickle.dump(osrtdnn,f) # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' test error %f') % (test_score)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f obtained at iteration %i, with test performance %f' % (best_validation_loss, best_iter + 1, test_score)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print savename
def evaluate_rtdnn(): # Not so important n_epochs = 500 dataset = 'timit' channel = 1 image_h = 1 filter_h = 1 # Data type : 'timit', 'sin', 'rect', ... wavtype = 'timit' # Control Data size and batch size start = 0 stop = None valid_stop = None segment = 'Y' batch_size = 256 #Learning rule and rate learning_rule = 'ada' learning_rate = 0.1 mom = 0.96 mom_an = 0 st_an = 100 # Control Layer's characteristics image_w = 256 dechid = ['lin','tanh','tanh'] nkerns = [40,40,4] stride = [2,2,2] filter_w = 25 postfix = '' if not len(nkerns) == len(dechid) or not len(nkerns) == len(stride): raise ValueError('nkerns, dechid, stride should have same length') savepath = '/data/lisa/exp/kimtaeho/speech_synthesis/rtdnn/result/' if not os.path.exists(savepath): os.makedirs(savepath) filename = time.strftime("%m%d") + '_tdnn_' + wavtype + \ '_' + str(batch_size) + \ '_' + str(stop) + \ '_' + str(image_w) + \ '_' + str(filter_w) + \ '_' + str(nkerns[-1]) + ':' + str(np.prod(stride)) + \ '_' + learning_rule + \ '_' + str(learning_rate) + \ '_' + dechid[-1] \ + postfix savename = savepath + filename print savename if os.path.exists(savename + '.pkl'): ans = raw_input('Same exp. exists, continue? ([Y]/N) ') if ans.upper() == 'N': return nrng = np.random.RandomState(23455) if dataset == 'mnist.pkl.gz': from code.utils.load_data import load_data_mnist datasets = load_data_mnist(dataset) (train_set_x, train_set_y,) = datasets[0] (valid_set_x, valid_set_y,) = datasets[1] (test_set_x, test_set_y,) = datasets[2] elif dataset == 'timit': from code.utils.load_data import load_data_timit_seq train_set_x = load_data_timit_seq('train', start, stop, image_w, wavtype, segment) valid_set_x = load_data_timit_seq('valid', start, valid_stop, image_w, wavtype, segment) n_train_batches0 = train_set_x.get_value(borrow=True).shape[0] n_valid_batches0 = valid_set_x.get_value(borrow=True).shape[0] if batch_size > min(n_train_batches0, n_valid_batches0): print 'Maximum batch size is %d' % n_train_batches0 batch_size = min(n_train_batches0,n_valid_batches0) n_train_batches = n_train_batches0 / batch_size n_valid_batches = n_valid_batches0 / batch_size index = T.lscalar() x = T.matrix('x') print '... building the model' x_re = x.reshape((batch_size, channel, image_h, image_w)) input_shape = [] filter_shape = [] for i in xrange(len(nkerns)): if i == 0: filter_shape.append((nkerns[0], channel, filter_h, filter_w)) input_shape.append((batch_size, channel, image_h, image_w)) else: filter_shape.append((nkerns[i], nkerns[(i - 1)], filter_h, filter_w)) input_shape.append((batch_size, nkerns[(i - 1)], image_h, image_w / 2 ** i)) # stride should be changed osrtdnn = rtdnn(nrng=nrng, input_shape=input_shape, filter_shape=filter_shape, stride=stride, dec_hid=dechid) # Initialization of hidden representation osrtdnn.set_batch_size(n_train_batches0) x_tot_shape = x.reshape((n_train_batches0, channel, image_h, image_w)) z_val = osrtdnn.encode(x_tot_shape) z_init = theano.function([x], z_val) z_tot = theano.shared(value=z_init(train_set_x.get_value()), borrow=True) osrtdnn.set_batch_size() (cost, cost_dec, cost_rec,) = osrtdnn.cost(x_re, z_tot[(index * batch_size):((index + 1) * batch_size)]) fparams = osrtdnn.fparams fgrads = T.grad(cost, fparams) fgradsdic = dict(zip(fparams, fgrads)) zgrads = T.grad(cost, z_tot) zgradsdic = {z_tot: zgrads} if learning_rule == 'ada': ad = AdaDelta() fupdates = ad.get_updates(learning_rate, fgradsdic) zupdates = ad.get_updates(learning_rate, zgradsdic) elif learning_rule == 'con': fupdates = [] for (param_i, grad_i,) in zip(fparams, fgrads): fupdates.append((param_i, param_i - learning_rate * grad_i)) zupdates = {z_tot: z_tot - zgrads} elif learning_rule == 'mom': momentum = mom mm = Momentum(momentum) fupdates = mm.get_updates(learning_rate, fgradsdic) zupdates = mm.get_updates(learning_rate, zgradsdic) else: raise ValueError('invalid learning_rule') train_z_model = theano.function( inputs=[index], outputs=[cost, cost_dec, cost_rec], updates=zupdates, givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) train_f_model = theano.function( inputs=[index], outputs=[cost, cost_dec, cost_rec], updates=fupdates, givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) valid_model = theano.function( inputs=[index], outputs=[cost, cost_dec, cost_rec], givens={x: train_set_x[(index * batch_size):((index + 1) * batch_size)]}) print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 train_check_freq = np.inf valid_check_freq = np.inf #min(n_train_batches, patience / 2) first_lr = learning_rate en_an = 2000 best_params = None best_validation_loss = np.inf test_score = 0.0 start_time = time.clock() valid_time = time.clock() score = [] score_dec = [] score_rec = [] monitor = [] epoch = 0 done_looping = False while epoch < n_epochs and not done_looping: epoch_start_time = time.clock() epoch = epoch + 1 sum_cost = 0 sum_cost_dec = 0 sum_cost_rec = 0 if epoch > st_an and learning_rule in ('con', 'mom'): learning_rate = first_lr / (epoch - st_an) if learning_rule == 'mom': mm.learning_rate = first_lr / (epoch - st_an) print mm.learning_rate if epoch == 0: mm.momentum = 0 elif epoch < mom_an: mm.momentum = mom / (mom_an - e) mm.momentum = mom for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index (cost_z, cost_dec_z, cost_rec_z) = train_z_model(minibatch_index) (cost_f, cost_dec_f, cost_rec_f) = train_f_model(minibatch_index) sum_cost += cost_f + cost_z sum_cost_dec += cost_dec_f + cost_dec_z sum_cost_rec += cost_dec_f + cost_dec_z if (iter+1) % train_check_freq == 0: print 'training @ iter = %i, time = %3.2fs, training cost = %f, %f, %s'% \ (iter+1, time.clock() - start_time, cost_f, cost_z, filename) start_time = time.clock() if (iter+1) % valid_check_freq == 0: validation_losses = [valid_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print 'trainin @ iter = %i, time = %3.2fs, valid cost %f, %s ' % ( iter+1, time.clock() - valid_time, this_validation_loss, filename) valid_time = time.clock() # if we got the best validation score until now #if this_validation_loss < best_validation_loss: # best_validation_loss = this_validation_loss # best_iter = iter # with open(savename+'.pkl', 'wb') as f: # pickle.dump([osrtdnn, z_tot, monitor],f) validation_losses = [valid_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) sum_cost /= n_train_batches sum_cost_dec /= n_train_batches sum_cost_rec /= n_train_batches print ' %3i epoch, train error %f, valid error %f, takes %3.2fs, %s' % (epoch, sum_cost, this_validation_loss, time.clock() - epoch_start_time, filename) score.append(sum_cost) score_dec.append(sum_cost_dec) score_rec.append(sum_cost_rec) monitor = [score, score_dec, score_rec] if epoch == 0: if sum_cost < best_validation_loss: best_validation_loss = sum_cost osrtdnn.set_cost(best_validation_loss, sum_cost_dec, sum_cost_rec) with open(savename + '.pkl', 'wb') as f: pickle.dump([osrtdnn, z_tot, monitor], f) end_time = time.clock() print 'Optimization complete.' print 'Best validation score of %f, with test performance %f' % (best_validation_loss, test_score) print >> sys.stderr, 'The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.0) print savename