############################################################### # (Re-Define) Architecture: input --> LSTM --> predict one-ahead ############################################################### x = T.matrix('x') # the data is presented as a vector of inputs with many exchangeable examples of this vector x = clip_gradient(x,1.0) is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction rng = numpy.random.RandomState(1234) # The poisson regression layer gets as input the hidden units # of the hidden layer n_hidden = 400; lstm_1 = LSTM(rng, x, n_in=data_set_x.get_value(borrow=True).shape[1], n_out=n_hidden) lstm_2 = LSTM(rng, lstm_1.output, n_in=n_hidden, n_out=n_hidden-200) output = LinearRegression(input=lstm_2.output, n_in=n_hidden-200, n_out=data_set_x.get_value(borrow=True).shape[1]) ################################################ # Load learned params ################################################ f = file(params_file, 'rb') old_p = cPickle.load(f) f.close() lstm_1.W_i.set_value(old_p[0].get_value(), borrow=True)
poolsize=(1, 3), dim2=1) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(1, filter_number_1, song_size - 1, 20), filter_shape=(filter_number_2, filter_number_1, 1, 2), poolsize=(1, 2), dim2=1) lstm_input = layer1.output.reshape((song_size - 1, 10 * filter_number_2)) #May be worth splitting to different LSTMs...would require smaller filter size lstm_1 = LSTM(rng, lstm_input, n_in=10 * filter_number_2, n_out=n_hidden) output = PoissonRegression(input=lstm_1.output, n_in=n_hidden, n_out=responses.get_value(borrow=True).shape[1]) pred = output.E_y_given_x * trial_no nll = output.negative_log_likelihood(y, trial_no) ################################ # Objective function and GD ################################ print 'defining cost, parameters, and learning function...' # the cost we minimize during training is the negative log likelihood of # the model
'x' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector x = clip_gradient(x, 1.0) y = T.matrix( 'y' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector is_train = T.iscalar( 'is_train') # pseudo boolean for switching between training and prediction rng = numpy.random.RandomState(1234) # Architecture: input --> LSTM --> predict one-ahead lstm_1 = LSTM(rng, x, n_in=data_set_x.get_value(borrow=True).shape[1], n_out=n_hidden) output = LogisticRegression(input=lstm_1.output, n_in=n_hidden, n_out=data_set_x.get_value(borrow=True).shape[1]) ################################ # Objective function and GD ################################ print 'defining cost, parameters, and learning function...' # the cost we minimize during training is the negative log likelihood of # the model cost = T.mean(output.cross_entropy_binary(y))
def SGD_training(learning_rate=1, n_epochs=1000): """ stochastic gradient descent optimization """ dataset_info = load_all_data() data_set_x = dataset_info[0] maxBatchSize = numpy.int_(dataset_info[1]) batch_size = maxBatchSize n_train_batches = 28 #n_valid_batches = 1 #n_test_batches = 1 ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix( 'x' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector x = clip_gradient(x, 1.0) y = T.matrix( 'y' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction rng = numpy.random.RandomState(1234) ################################################ # Architecture: input --> LSTM --> predict one-ahead ################################################ # The poisson regression layer gets as input the hidden units # of the hidden layer d_input = Dropout(rng, is_train, x) n_hidden = 100 lstm_1 = LSTM(rng, d_input.output, n_in=data_set_x.get_value(borrow=True).shape[1], n_out=n_hidden) #lstm_1 = RNN(rng, d_input.output, n_in=data_set_x.get_value(borrow=True).shape[1], n_out=n_hidden) #vanilla rnn d_lstm_1 = Dropout(rng, is_train, lstm_1.output) output = LinearRegression(input=d_lstm_1.output, n_in=n_hidden, n_out=data_set_x.get_value(borrow=True).shape[1]) ####################### # Objective function ####################### print '... defining objective and compiling test and validate' # the cost we minimize during training is the negative log likelihood of # the model cost = T.mean(output.negative_log_likelihood(y)) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch # use cost or errors(y,tc,md) as output? test_model = theano.function( inputs=[index], outputs=[cost, output.E_y_given_x], givens={ x: data_set_x[index * batch_size:((index + 1) * batch_size - 1)], y: data_set_x[(index * batch_size + 1):(index + 1) * batch_size], is_train: numpy.cast['int32'](0) }) # wanted to use below indexes and have different sized batches, but this didn't work #[int(batchBreaks[index]-1):int(batchBreaks[(index+1)]-1)] validate_model = theano.function( inputs=[index], outputs=cost, givens={ x: data_set_x[index * batch_size:((index + 1) * batch_size - 1)], y: data_set_x[(index * batch_size + 1):(index + 1) * batch_size], is_train: numpy.cast['int32'](0) }) ####################### # Parameters and gradients ####################### print '... parameters and gradients' # create a list (concatenated) of all model parameters to be fit by gradient descent #order: [self.W, self.b] params = lstm_1.params + output.params params_helper = lstm_1.params_helper + output.params_helper params_helper2 = lstm_1.params_helper2 + output.params_helper2 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [] for param in params: gparam = T.grad(cost, param) gparams.append(gparam) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] #for param, gparam in zip(params, gparams): # updates.append((param, param - learning_rate * gparam)) #iter_count = theano.shared(1) #L1_penalized = [] #larger_stepsize = [] #enforce_positive = [2, 3] #if recurrent #enforce_positive = [] #zero_stepsize = [] param_index = 0 #rho = 1e-6 #for param, param_helper, param_helper2, gparam in zip(params, params_helper, params_helper2, gparams): #updates.append((param_helper, param_helper + gparam ** 2)) #need sum of squares for learning rate #updates.append((param_helper2, param_helper2 + gparam)) #need sum of gradients for L1 thresholding #vanilla SGD #for param, gparam in zip(params, gparams): # updates.append((param, param - learning_rate * gparam)) # param_index += 1 #adadelta updates rho = .95 eps_big = 1e-6 for param, param_helper, param_helper2, gparam in zip( params, params_helper, params_helper2, gparams): updates.append( (param_helper, rho * param_helper + (1. - rho) * (gparam**2))) #update decaying sum of previous gradients dparam = -T.sqrt( (param_helper2 + eps_big) / (rho * param_helper + (1. - rho) * (gparam**2) + eps_big)) * gparam # calculate step size updates.append( (param_helper2, rho * param_helper2 + (1. - rho) * (dparam**2))) #update decaying sum of previous step sizes updates.append((param, param + dparam)) #updates.append((iter_count, iter_count + 1)) print '... compiling train' # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: data_set_x[index * batch_size:((index + 1) * batch_size - 1)], y: data_set_x[(index * batch_size + 1):(index + 1) * batch_size], is_train: numpy.cast['int32'](0) }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 5000 # look as this many examples regardless #patience = train_set_x.get_value(borrow=True).shape[0] * n_epochs #no early stopping patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.99 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch #best_params = None best_validation_loss = numpy.inf best_iter = 0 #test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) print minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute absolute error loss on validation set validation_losses = [validate_model(i) for i in [28]] this_validation_loss = numpy.mean( validation_losses) #mean over batches print('epoch %i, minibatch %i, validation error %f' % (epoch, minibatch_index + 1, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set #test_losses = [test_model(i) for i # in [29]] #test_score = numpy.mean(test_losses) test_cost, test_pred = test_model(29) #test_cost, test_costs_separate, test_pred_separate, test_actual_separate = test_model(29) print((' epoch %i, minibatch %i, test error of ' 'best model %f') % (epoch, minibatch_index + 1, numpy.sum(test_cost))) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f' 'obtained at iteration %i, with test performance %f') % (best_validation_loss, best_iter + 1, numpy.sum(test_cost))) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) #store data f = file('results/params.save', 'wb') for obj in [params + [test_cost] + [test_pred]]: cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
index = T.lscalar() # index to a [mini]batch x = T.matrix( 'x' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector y = T.imatrix( 'y' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector ahead = T.matrix('ahead') sent = T.matrix('sentence') phonemes = T.imatrix('phonemes') rng = numpy.random.RandomState(1234) init_reg = LinearRegression(x, 1, 30, True) lstm_1 = LSTM(rng, init_reg.E_y_given_x, 30, lstm_1_hidden) lstm_2 = LSTM(rng, lstm_1.output, lstm_1_hidden, lstm_2_hidden) reg_input = lstm_2.output #need log_reg and cross covariate layers log_reg = LogisticRegression(reg_input, lstm_2_hidden, 41) #lin_reg = LinearRegression(reg_input,lstm_2_hidden,1,True) log_reg.reconstruct(log_reg.p_y_given_x) #lin_reg.reconstruct(lin_reg.E_y_given_x) #reconstructed_regressions = T.concatenate([log_reg.reconstructed_x,lin_reg.reconstructed_x],axis=1) #
def generate_visualization_on_section(region,heldout,neuron_no,training_iterations,hidden,song_section,previous): region_dict = {'L1':0,'L2':2,'L3':4,'NC':6,'MLd':8} held_out_song = heldout brain_region = region brain_region_index = region_dict[brain_region] neuron = neuron_no prev_trained_size = previous.shape[0] n_epochs= training_iterations n_hidden = hidden song_depth = song_section print 'Running CV for held out song '+str(held_out_song)+' for brain region '+brain_region+' index at '+str(brain_region_index)+' iteration:'+str(song_depth) #Filepath for printing results results_filename='/vega/stats/users/sl3368/rnn_code/results/neural/dual_'+str(n_hidden)+'/visualizations/'+brain_region+'_'+str(held_out_song)+'_'+str(neuron)+'.out' #check if exists already, then load or not load load_params_pr_filename = '/vega/stats/users/sl3368/rnn_code/saves/params/neural/dual_'+str(n_hidden)+'/'+brain_region+'_'+str(held_out_song)+'.save' if path.isfile(load_params_pr_filename): #print 'Will load previous regression parameters...' load_params_pr = True else: load_params_pr = False song_size = 2459 #filepath for saving parameters savefilename = '/vega/stats/users/sl3368/rnn_code/saves/params/neural/dual_'+str(n_hidden)+'/visualizations/'+brain_region+'_'+str(held_out_song)+'_'+str(neuron)+'.visualization' if path.isfile(savefilename): load_visualize = True else: load_visualize = False ################################################ # Load Data ################################################ dataset_info = load_all_data() stim = dataset_info[0] data_set_x = theano.shared(stim, borrow=True) #print 'Getting neural data...' neural_data = load_neural_data() ntrials = theano.shared(neural_data[brain_region_index],borrow=True) responses = theano.shared(neural_data[brain_region_index+1],borrow=True) ###################### # BUILD ACTUAL MODEL # ###################### #print 'building the model...' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch section = T.lscalar() init = numpy.zeros((song_depth,60)).astype('f') init[:prev_trained_size] = previous x = shared(init,borrow=True) y = T.matrix('y') # the data is presented as a vector of inputs with many exchangeable examples of this vector trial_no = T.matrix('trial_no') rng = numpy.random.RandomState(1234) # Architecture: input --> LSTM --> predict one-ahead lstm_1 = LSTM(rng, x, n_in=data_set_x.get_value(borrow=True).shape[1], n_out=n_hidden) output = PoissonRegression(input=lstm_1.output, n_in=n_hidden, n_out=1) pred = output.E_y_given_x.T * trial_no[:,neuron] nll = output.negative_log_likelihood(y[:,neuron],trial_no[:,neuron],single=True) ################################ # Objective function and GD ################################ #print 'defining cost, parameters, and learning function...' # the cost we minimize during training is the negative log likelihood of # the model cost = T.mean(nll) #Defining params params = [x] # updates from ADAM updates = Adam(cost, params) ####################### # Objective function ####################### #print 'compiling train....' train_model = theano.function(inputs=[index,section], outputs=cost, updates=updates, givens={ trial_no: ntrials[index * song_size:((index * song_size)+section)], y: responses[index * song_size:((index * song_size)+section)]}) #validate_model = theano.function(inputs=[index,section], # outputs=[cost,nll.shape,pred.shape], # givens={ # trial_no: ntrials[index * song_size:((index * song_size)+section)], # y: responses[index * song_size:((index * song_size)+section)]}) ####################### # Parameters and gradients ####################### #print 'parameters and gradients...' if load_params_pr: #print 'loading LSTM parameters from file...' f = open( load_params_pr_filename) old_p = cPickle.load(f) lstm_1.W_i.set_value(old_p[0].get_value(), borrow=True) lstm_1.W_f.set_value(old_p[1].get_value(), borrow=True) lstm_1.W_c.set_value(old_p[2].get_value(), borrow=True) lstm_1.W_o.set_value(old_p[3].get_value(), borrow=True) lstm_1.U_i.set_value(old_p[4].get_value(), borrow=True) lstm_1.U_f.set_value(old_p[5].get_value(), borrow=True) lstm_1.U_c.set_value(old_p[6].get_value(), borrow=True) lstm_1.U_o.set_value(old_p[7].get_value(), borrow=True) lstm_1.V_o.set_value(old_p[8].get_value(), borrow=True) lstm_1.b_i.set_value(old_p[9].get_value(), borrow=True) lstm_1.b_f.set_value(old_p[10].get_value(), borrow=True) lstm_1.b_c.set_value(old_p[11].get_value(), borrow=True) lstm_1.b_o.set_value(old_p[12].get_value(), borrow=True) f.close() if load_params_pr: #print 'loading PR parameters from file...' f = open( load_params_pr_filename) old_p = cPickle.load(f) output.W.set_value(old_p[13].get_value(), borrow=True) output.b.set_value(old_p[14].get_value(), borrow=True) f.close() #if load_visualize: # print 'loading visualization from file...' # f = open(savefilename) # old_v = cPickle.load(f) # x.set_value(old_v, borrow=True) # f.close() ############### # TRAIN MODEL # ############### #print 'training...' best_validation_loss = numpy.inf epoch = 0 last_e = time.time() r_log=open(results_filename,'a') r_log.write('Starting training...\n') r_log.close() while (epoch < n_epochs): #print str(epoch)+' epoch took: '+str(time.time()-last_e) #r_log=open(results_filename, 'a') #r_log.write(str(epoch)+ ' epoch took: '+str(time.time()-last_e)+'\n') #r_log.close() last_e = time.time() epoch = epoch + 1 mb_costs = [] heldout = 0 for minibatch_index in xrange(14): if(heldout==held_out_song): minibatch_avg_cost = train_model(minibatch_index,song_depth) #print minibatch_avg_cost mb_costs.append(minibatch_avg_cost) heldout=heldout+1 for minibatch_index in xrange(24,30): if(heldout==held_out_song): minibatch_avg_cost = train_model(minibatch_index,song_depth) #print minibatch_avg_cost mb_costs.append(minibatch_avg_cost) heldout=heldout+1 avg_cost = numpy.mean(mb_costs) # if we got the best validation score until now if avg_cost < best_validation_loss: best_validation_loss = avg_cost visualization = numpy.asarray(x.eval()) #store data f = file(savefilename, 'wb') for obj in [visualization]: cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print('epoch %i, training error %f' % (epoch, avg_cost)) r_log=open(results_filename, 'a') r_log.write('epoch %i, training error %f' % (epoch, avg_cost)) r_log.close() return numpy.asarray(x.eval())