def output(self, use_dropout=False, depth=0): """ Provides data to next layer and applies dropout """ ret = self.input_layer if use_dropout: num_str = NNl.get_num_streams(np.prod(self.out_shape)) mask = NNl.gen_mask(self.srng, self.out_shape, self.p_retain, num_str) ret *= mask / self.p_retain return ret
def output(self, use_dropout=False, depth=0): """ Apply the activation and dropout to the signal, producing output that will be used by subsequent layers """ out = self.activation(self.signal(use_dropout=use_dropout, depth=depth)) c_shape = self.out_shape if use_dropout: num_str = NNl.get_num_streams(np.prod(c_shape)) mask = NNl.gen_mask(self.srng, c_shape, self.p_retain, num_str) out = out * mask / self.p_retain return out
def class_probs(model, data, train_params): """ Creates Theano function to return the probabilities of class membership. Args: model: Model instance samples: set of data points to use (note: just data, not a Dataset) batch_size: size of the batch for calculation n_batches: how many batches are required to span the samples Returns: An nparray of class membership probabilities. Useful for performing meta-analysis/transfer learning. """ index = T.lscalar('index') # index to a [mini]batch aug = T.ivector('aug') # data augmentation (jitter, flip) in_dims = model.x_shape[2:] p_func = theano.function([index, aug], model.out_layer.p_y_given_x(False), givens={ model.x: NNl.get_batch(data.V[0], index, data.batch_n, in_dims, aug) }) y = [ p_func(i, model.ref_aug) for i in range(train_params['v_batches'] + 1) ] return np.asarray(np.concatenate(y, axis=0))
def __init__(self, rngs, input_layer, Lshape, traits, activation): super(ConvLayer, self).__init__(input_layer, traits, "Conv") self.rng = rngs[0] self.l2decay = traits['l2decay'] filter_shape = Lshape[1] # The number of input channels must match number of filter channels assert Lshape[0][1] == filter_shape[1] self.pad = traits['padding'] self.W = NNl.gen_weights(self.rng, filter_shape, 0, traits['initW']) # convolve input feature maps with filters # Using Alex K.'s fast CUDA conv, courtesy of S. Dieleman self.x = self.input_layer.output(False) conv_op = FilterActs(pad=self.pad, partial_sum=1) input_shuffled = (self.x).dimshuffle(1, 2, 3, 0) # bc01 to c01b filters_shuffled = (self.W).dimshuffle(1, 2, 3, 0) # bc01 to c01b contiguous_input = gpu_contiguous(input_shuffled) contiguous_filters = gpu_contiguous(filters_shuffled) out_shuffled = conv_op(contiguous_input, contiguous_filters) self.conv_out = out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01 # store parameters of this layer self.params = [self.W]
def class_probs(model, data, train_params): """ Creates Theano function to return the probabilities of class membership. Args: model: Model instance samples: set of data points to use (note: just data, not a Dataset) batch_size: size of the batch for calculation n_batches: how many batches are required to span the samples Returns: An nparray of class membership probabilities. Useful for performing meta-analysis/transfer learning. """ index = T.lscalar('index') # index to a [mini]batch aug = T.ivector('aug') # data augmentation (jitter, flip) in_dims = model.x_shape[2:] p_func = theano.function([index, aug], model.out_layer.p_y_given_x(False), givens={model.x: NNl.get_batch(data.V[0], index, data.batch_n, in_dims, aug)}) y = [p_func(i, model.ref_aug) for i in range(train_params['v_batches']+1)] return np.asarray(np.concatenate(y, axis=0))
def __init__(self, rngs, input_layer, Lshape, traits, activation): super(OutputLayer, self).__init__(input_layer, traits, "Output") self.out_shape = (Lshape[0], Lshape[1]) self.W_shape = Lshape[1:] self.activation = activation self.l2decay = traits['l2decay'] if len(Lshape) != 3: print("Logistic regression shape must be (2,), it is,", Lshape) # Initialize weights and biases (can load values later) self.W = NNl.gen_weights(rngs[0], self.W_shape, 0, traits['initW']) self.b = Tsh(np.zeros((Lshape[2],), dtype=Tfloat)) self.params = [self.W, self.b]
def create_functions(model, data, rho, profmode): """Creates Theano functions for SGD backprop. Args: data: Dataset instance on which to train. rho: momentum parameter profmode: used only for profiling """ print 'Compiling functions...', # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch lrate = T.fscalar('lrate') # learning rate aug = T.ivector('aug') # data augmentation (jitter, flip) in_dims = model.x_shape[2:] x = model.x y = model.y # Functions to calculate our training and validation error while we run functions = {} f_input = [index, aug] functions['train_E'] = theano.function(f_input, model.val_error, givens={ x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug), y: NNl.get_batch_0(data.T[1], index, data.batch_n)}, mode=profmode) if not data.test: functions['val_E'] = theano.function(f_input, model.val_error, givens={ x: NNl.get_batch(data.V[0], index, data.batch_n, in_dims, aug), y: NNl.get_batch_0(data.V[1], index, data.batch_n)}, mode=profmode) train_input = [index, lrate, aug] # Functions to update the model, via momentum-based SGD mom_updates = [] p_updates = [] for layer in model.layers[-1:0:-1]: grads = T.grad(model.cost, layer.params) for grad_i, param_i in zip(grads, layer.params): delta_i = theano.shared(param_i.get_value() * 0.) c_update = (delta_i, rho * delta_i - lrate * grad_i - lrate * layer.l2decay * param_i) mom_updates.append(c_update) p_updates.append((param_i, param_i + delta_i)) functions['momentum'] = theano.function(train_input, model.cost, updates=mom_updates, givens={ x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug), y: NNl.get_batch_0(data.T[1], index, data.batch_n)}, mode=profmode) functions['update'] = theano.function([], updates=p_updates, mode=profmode) print 'done' return functions
def istrazuj(self): SB = self.SBSpinBox.value() UZ = self.UZSpinBox.value() # - odredjivanje koncepta na osnovu vrijednosti koje NN vrati if NNlib.neuro(SB, UZ) == 1: koncept = 'zid' else: koncept = 'prazan prostor' # - ucitaj koji je koncept u varijablu sql = "SET @koncept:='%s';" % koncept self.cursor.execute(sql) self.conn.commit() # Dodaj vrijednosti i koordinate u bazu # - ucitavanje trenutnog smjera za izracun koordinata polja ispred self.cursor.execute("SELECT smjer, poljeID FROM stanje;") smjer, t_polje = self.cursor.fetchone() # - izracunaj i spremi vrijednosti polja ispred self.cursor.execute("CALL spremi_polje_ispred(%s)", smjer) # - dodaj vrijednosti polja u okruzenje self.cursor.execute("SELECT poljeID, tezina, d_tezina FROM polje WHERE xkoord = @pi_x AND ykoord = @pi_y") poljeID, tezina, d_tezina= self.cursor.fetchone() self.okruzenje[int(poljeID)] = int(tezina) self.cursor.execute("CALL dodajVezu('%s','veza', '%s', %s);", (t_polje, poljeID, d_tezina)) self.conn.commit() # - ucitaj koordinate polja ispred self.cursor.execute("SELECT @pi_x, @pi_y;") pi_x, pi_y = self.cursor.fetchone() # - spremi t_smjer vrijednost polja ispred self.cursor.execute("UPDATE polje SET t_smjer = (SELECT smjer FROM stanje) WHERE xkoord = %s AND ykoord = %s;", (pi_x, pi_y)) self.conn.commit() self.unesenaVrijednost.setText(("<font color='green'>%s spremljeno na polju (%s, %s)</font>") % (koncept, pi_x, pi_y)) self.promjenaPozicije.setText("<font color='green'>Okrecem se desno 90 stupnjeva</font>") # Skreni 90 stupnjeva na desno; smjeru se dodaje +90, a ako je vece od 250 vraca se na 0 self.cursor.execute("UPDATE stanje SET smjer = IF(smjer < 250, smjer + 90, 0);") self.conn.commit() self.brojac += 1 if self.brojac == 4: self.brojac = 0 self.master_istrazuj()
def __init__(self, rngs, input_layer, Lshape, traits, activation): super(FCLayer, self).__init__(input_layer, traits, "FC") self.p_retain = (1. - traits['dropout']) self.rng = rngs[0] self.srng = rngs[1] self.out_shape = (Lshape[0], Lshape[2]) self.W_shape = Lshape[1:] self.activation = activation self.l2decay = traits['l2decay'] self.d_rec = input_layer.output(False) self.best_error = np.inf if len(Lshape) != 3: print "FC layer shape must be (2,), it is,", Lshape self.W = NNl.gen_weights(self.rng, self.W_shape, 0, traits['initW']) self.b = Tsh(np.zeros(Lshape[2], dtype=Tfloat)) self.ib = Tsh(np.zeros(Lshape[1], dtype=Tfloat)) self.params = [self.W, self.b,] self.pt_params = [self.W, self.b, self.ib]
def train_NN(CPFile="", datafile="rawdata", dataset=None, SFile="structure", train_ae=False, profiling=False, rho=0.9, LR=0.010, n_epochs=500, batch_size=128, cut=-1, cv_k=10, seed=1000, predict=False, verbose=True): """The core routine for neural net training. Args: CPFile: Checkpoint file from which to resume a training run. Checkpoints are saved automatically as progress is made on a validation set, with standard filename "model_cp" datafile: File from which to retrieve raw data. This is subsequently loaded into a Dataset instance. dataset: Specifies a dataset to load directly. For use with training meta-algorithms that modify the dataset over multiple runs. SFile: The structure file that specifies the neural net architecture. train_ae: Flag for training as an autoencoder. profiling: Flag for turning on profiling for examining performance. rho: Momentum parameter. Standard momentum is used by default for both autoencoder and backprop training. LR: Learning rate. n_epochs: Number of epochs for training. batch_size: SGD mini-batch size. Processing speed increases for larger sizes, but fewer updates are made as a tradeoff. cut: Number of training examples to use from the raw data, with the rest as validation. '-1' indicates look at cv_k. cv_k: 'k' in K-fold validation. 1/k of the data used as a validation set, with the rest as training. seed: specifies random seed. For a given seed, dataset, and neural net architecture, the run will be repeatable. verbose: Flag determining whether to send continual updates to stdout. """ sched_dict = {20: 0.005, 100: 0.001, 200: 0.0001} # This is passed to the theano functions for profiling profmode = NNl.get_profiler(profiling) # A dictionary collecting the necessary training parameters train_params = { 'LR': LR, 'n_epochs': n_epochs, 'rho': rho, 'verb': verbose, 'LRsched': sched_dict } # Create RNGs, one normal one Theano, which are passed to the Builder rng = np.random.RandomState(seed) theano_rng = MRG_RStreams(rng.randint(999999)) rngs = [rng, theano_rng] # Load the dataset, then split for validation if dataset: data = dataset if not data.T: train_params.update( data.prep_validation(batch=batch_size, cut=cut, k=cv_k)) else: train_params.update(data.V_params) else: data = Dataset(datafile, rng) if predict: cv_k = 1 train_params.update( data.prep_validation(batch=batch_size, cut=cut, k=cv_k)) #*** CREATE A MODEL CLASS INSTANCE ***# in_shape = (batch_size, ) + data.sample_dim # Load the checkpoint if there, otherwise use 'structure' to define network if os.path.isfile(CPFile): mymodel = Model(rngs, in_shape, data.label_dim, CPFile, struc_file="") else: mymodel = Model(rngs, in_shape, data.label_dim, struc_file=SFile) if mymodel.zeropad > 0: data.zeropad(mymodel.zeropad) #*** AUTOENCODER ***# #___________________# layers_to_train = [] if train_ae: for layer in mymodel.layers: if layer.tag == "FC": layers_to_train.append(layer) for layer in layers_to_train: if layer.input_layer.tag == "Input": print "@ Autoencoding layer", layer.number, "with RSTanh" activ = NNl.RSTanh else: print "@ Autoencoding layer", layer.number, "with SoftReLU" activ = NNl.SoftReLU functions = create_functions_ae(layer, data.T, activ, batch_size, rho, mymodel.x, mymodel.x_shape[2:], profmode) train_params['logfile'] = NNl.prepare_log(mymodel, data.description) train_params['error'] = layer train(mymodel, functions, train_params) #*** SGD BACKPROP ***# #____________________# if predict: print '@ Predicting' # predict_label(mymodel, data, train_params) cp.dump(class_probs(mymodel, data, train_params), open("class_p", 'wb'), 2) else: print '@ Training with SGD backprop' T_functions = create_functions(mymodel, data, rho, profmode) # Logfile made for analysis of training train_params['logfile'] = NNl.prepare_log(mymodel, data.description) train_params['error'] = mymodel train(mymodel, data, T_functions, train_params) print "\nBest validation: ", mymodel.best_error if profiling: profmode.print_summary() # mymodel.update_model("model_cp") return mymodel
def create_functions_ae(layer, training, activation, batch_size, rho, x, in_dims, profmode): """Creates the Theano functions necessary to train as an autoencoder. Args: layer: The layer (FC) on which training will occur. training: The set of training data (no labels, just features). activation: The function to use for 'reconstruction' activation. This will generally align with the distribution found in the input you are trying to reconstruct. For example, for mean-std normalized input data, you might use a Tanh function in your 1st layer. If that first layer has a ReLU neuronal activation, then your 2nd hidden layer would probably use the SoftReLU reconstruction activation. batch_size: size of the batches used for autoencoder training. rho: momentum parameter x: Theano variable input to the current layer. in_dims: shape of the input piped through 'x'. profmode: Flag for profiling Returns: Dictionary of functions for autoencoder training: cost, and 2 updates """ print 'Compiling autoencoder functions...', # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch lrate = T.fscalar('lrate') # learning rate aug = T.ivector('aug') # data augmentation (jitter, flip) # Functions to calculate our training and validation error while we run functions = {} cost = layer.reconstruct_mse(activation) f_input = [index, aug] functions['train_E'] = theano.function(f_input, cost, givens={ x: NNl.get_batch( training[0], index, batch_size, in_dims, aug) }, mode=profmode) # create a list of gradients for all model parameters grads = T.grad(cost, layer.pt_params) train_input = [index, lrate, aug] # Functions to update the model, via momentum-based SGD mom_updates = [] p_updates = [] for grad_i, param_i in zip(grads, layer.pt_params): delta_i = theano.shared(param_i.get_value() * 0.) c_update = (delta_i, rho * delta_i - lrate * grad_i) mom_updates.append(c_update) p_updates.append((param_i, param_i + delta_i)) functions['momentum'] = theano.function(train_input, cost, updates=mom_updates, givens={ x: NNl.get_batch( training[0], index, batch_size, in_dims, aug) }, mode=profmode) functions['update'] = theano.function([], updates=p_updates, mode=profmode) print 'done' return functions
print "\t-A\ttrain autoencoder" for key, value in std_opts.iteritems(): print "\t", key, "\t<", value, ">" sys.exit() if __name__ == '__main__': try: opts, args = getopt.getopt(sys.argv[1:], "hVPpAl:s:n:f:d:S:b:c:C:") except getopt.GetoptError as err: print str(err) usage() pass_in = {} for opt, val in opts: if opt in std_opts: pass_in[std_opts[opt]] = NNl.find_type(val) elif opt == "-V": pass_in['verbose'] = False elif opt == "-P": pass_in['profiling'] = True elif opt == "-p": pass_in['predict'] = True elif opt == "-A": pass_in['train_ae'] = True elif opt in ("-h", "--help"): usage() else: assert False, "unhandled option" print "Passed arguments: ", pass_in train_NN(**pass_in)
# U 'okruzenje' se spremaju sva polja okolo robota u tom trenutku da bi se mogalo izracunati sljedece polje na koje ce robot ici. Format: {ID polja: tezina polja} okruzenje = dict() cursor.execute("SELECT d_tezina FROM polje WHERE poljeID = (SELECT poljeID FROM stanje); ") t_tezina = cursor.fetchone() # Petlja za skeniranje okolo sebe, 1 - 4 za svaku stranu for i in range(4): # Skeniraj senzorima polje ispred sebe # - rucni unos vrijednosti senzora print "Unesi vrijednosti za polje ispred" SB = input("SB: ") UZ = input("UZ: ") # - odredjivanje koncepta na osnovu vrijednosti koje NN vrati if NNlib.neuro(SB, UZ) == 1: koncept = 'zid' else: koncept = 'prazan prostor' # - ucitaj koji je koncept u varijablu sql = "SET @koncept:='%s';" % koncept cursor.execute(sql) conn.commit() # Dodaj vrijednosti i koordinate u bazu # - ucitavanje trenutnog smjera za izracun koordinata polja ispred cursor.execute("SELECT smjer, poljeID FROM stanje;") smjer, t_polje = cursor.fetchone() # - izracunaj i spremi vrijednosti polja ispred cursor.execute("CALL spremi_polje_ispred(%s)", smjer)
def train(model, data, functions, params): """Generic routine to perform training on the GPU using Theano-compiled functions and common parameters. This will run through a specified number of 'epochs', each consisting of a full pass through the training data. The epochs are broken into batches as normal for Stochastic Gradient Descent. functions: A dictionary containing all of the necessary functions for training. It will at least have 'momentum', 'update', and 'train_E' functions. 'momentum' updates the delta for each parameter, 'update' applies the current delta, and 'train_E' gets the current training cost. For supervised training, 'val_E' will usually be included so you can keep track of your progress on the validation set. params: Necessary training params: LR, training_batches, n_epochs, verbose, validation_batches, error (links to where best error is tracked). """ LR = params['LR'] Nb = 0 for chunk_i in range(len(data.b_samples)): Nb += params['t_batches'][chunk_i] print "Training {} epochs at LR = {} rho = {}".format( params['n_epochs'], LR, params['rho']) print "Using schedule:", sorted(params['LRsched'].items()) # reference augmentation for checking error (centered, no flip) T_aug = model.ref_aug # Main training loop start_time = time.clock() for epoch in range(params['n_epochs']): ct = 0 for chunk_i in range(len(data.b_samples)): data.T[0].set_value(data.raw[chunk_i]) data.T[1].set_value( np.asarray(data.labels[chunk_i], dtype=data.ltype)) for batch_i in range(params['t_batches'][chunk_i]): functions['momentum'](batch_i, LR, model.gen_aug()) functions['update']() if params['verb'] and (ct + batch_i + 1) % int(Nb / 5) == 0: print '.', ct += params['t_batches'][chunk_i] # check the weight distribution model.param_status(epoch, output=open("wlog", 'a')) # compute error on test and validation set c_train_error = [ functions['train_E'](i, T_aug) for i in xrange(params['t_batches'][-1]) ] if epoch in params['LRsched']: LR = params['LRsched'][epoch] err_train = np.mean(c_train_error) if 'val_E' in functions: c_val_error = [ functions['val_E'](i, T_aug) for i in xrange(params['v_batches']) ] err_val = np.mean(c_val_error) else: err_val = err_train # if we achieved a new best validation score # save the model and best validation score if err_val < getattr(params['error'], "best_error"): if params['verb']: print 'S', setattr(params['error'], "best_error", err_val) model.save_model() else: print ' ', curr_time = NNl.nice_time(time.clock() - start_time) if 'val_E' in functions: if params['verb']: print( "{} | epoch {: >4}, LR={:.4f}, train: {:.5f}, val: {:.5f}". format(curr_time, epoch, LR, err_train, err_val)) else: print '.', params['logfile'].write("{} {: >4} {:.6f} {:.8f} {:.8f}\n".format( curr_time, epoch, LR, err_train, err_val)) else: if params['verb']: print("{} | epoch {: >4}, LR={:.5f}, train: {:.6f}".format( curr_time, epoch, LR, err_train)) params['logfile'].write("{} {: >4} {:.6f} {:.8f}\n".format( curr_time, epoch, LR, err_train))
def create_functions(model, data, rho, profmode): """Creates Theano functions for SGD backprop. Args: data: Dataset instance on which to train. rho: momentum parameter profmode: used only for profiling """ print 'Compiling functions...', # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch lrate = T.fscalar('lrate') # learning rate aug = T.ivector('aug') # data augmentation (jitter, flip) in_dims = model.x_shape[2:] x = model.x y = model.y # Functions to calculate our training and validation error while we run functions = {} f_input = [index, aug] functions['train_E'] = theano.function( f_input, model.val_error, givens={ x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug), y: NNl.get_batch_0(data.T[1], index, data.batch_n) }, mode=profmode) if not data.test: functions['val_E'] = theano.function( f_input, model.val_error, givens={ x: NNl.get_batch(data.V[0], index, data.batch_n, in_dims, aug), y: NNl.get_batch_0(data.V[1], index, data.batch_n) }, mode=profmode) train_input = [index, lrate, aug] # Functions to update the model, via momentum-based SGD mom_updates = [] p_updates = [] for layer in model.layers[-1:0:-1]: grads = T.grad(model.cost, layer.params) for grad_i, param_i in zip(grads, layer.params): delta_i = theano.shared(param_i.get_value() * 0.) c_update = (delta_i, rho * delta_i - lrate * grad_i - lrate * layer.l2decay * param_i) mom_updates.append(c_update) p_updates.append((param_i, param_i + delta_i)) functions['momentum'] = theano.function( train_input, model.cost, updates=mom_updates, givens={ x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug), y: NNl.get_batch_0(data.T[1], index, data.batch_n) }, mode=profmode) functions['update'] = theano.function([], updates=p_updates, mode=profmode) print 'done' return functions
def train(model, data, functions, params): """Generic routine to perform training on the GPU using Theano-compiled functions and common parameters. This will run through a specified number of 'epochs', each consisting of a full pass through the training data. The epochs are broken into batches as normal for Stochastic Gradient Descent. functions: A dictionary containing all of the necessary functions for training. It will at least have 'momentum', 'update', and 'train_E' functions. 'momentum' updates the delta for each parameter, 'update' applies the current delta, and 'train_E' gets the current training cost. For supervised training, 'val_E' will usually be included so you can keep track of your progress on the validation set. params: Necessary training params: LR, training_batches, n_epochs, verbose, validation_batches, error (links to where best error is tracked). """ LR = params['LR'] Nb = 0 for chunk_i in range(len(data.b_samples)): Nb += params['t_batches'][chunk_i] print "Training {} epochs at LR = {} rho = {}".format( params['n_epochs'], LR, params['rho']) print "Using schedule:", sorted(params['LRsched'].items()) # reference augmentation for checking error (centered, no flip) T_aug = model.ref_aug # Main training loop start_time = time.clock() for epoch in range(params['n_epochs']): ct = 0 for chunk_i in range(len(data.b_samples)): data.T[0].set_value(data.raw[chunk_i]) data.T[1].set_value(np.asarray(data.labels[chunk_i], dtype=data.ltype)) for batch_i in range(params['t_batches'][chunk_i]): functions['momentum'](batch_i, LR, model.gen_aug()) functions['update']() if params['verb'] and (ct + batch_i + 1) % int(Nb / 5) == 0: print '.', ct += params['t_batches'][chunk_i] # check the weight distribution model.param_status(epoch, output=open("wlog", 'a')) # compute error on test and validation set c_train_error = [functions['train_E'](i, T_aug) for i in xrange( params['t_batches'][-1])] if epoch in params['LRsched']: LR = params['LRsched'][epoch] err_train = np.mean(c_train_error) if 'val_E' in functions: c_val_error = [functions['val_E'](i, T_aug) for i in xrange(params['v_batches'])] err_val = np.mean(c_val_error) else: err_val = err_train # if we achieved a new best validation score # save the model and best validation score if err_val < getattr(params['error'], "best_error"): if params['verb']: print 'S', setattr(params['error'], "best_error", err_val) model.save_model() else: print ' ', curr_time = NNl.nice_time(time.clock() - start_time) if 'val_E' in functions: if params['verb']: print("{} | epoch {: >4}, LR={:.4f}, train: {:.5f}, val: {:.5f}" .format(curr_time, epoch, LR, err_train, err_val)) else: print '.', params['logfile'].write("{} {: >4} {:.6f} {:.8f} {:.8f}\n".format( curr_time, epoch, LR, err_train, err_val)) else: if params['verb']: print("{} | epoch {: >4}, LR={:.5f}, train: {:.6f}".format( curr_time, epoch, LR, err_train)) params['logfile'].write("{} {: >4} {:.6f} {:.8f}\n".format( curr_time, epoch, LR, err_train))
def create_functions_ae(layer, training, activation, batch_size, rho, x, in_dims, profmode): """Creates the Theano functions necessary to train as an autoencoder. Args: layer: The layer (FC) on which training will occur. training: The set of training data (no labels, just features). activation: The function to use for 'reconstruction' activation. This will generally align with the distribution found in the input you are trying to reconstruct. For example, for mean-std normalized input data, you might use a Tanh function in your 1st layer. If that first layer has a ReLU neuronal activation, then your 2nd hidden layer would probably use the SoftReLU reconstruction activation. batch_size: size of the batches used for autoencoder training. rho: momentum parameter x: Theano variable input to the current layer. in_dims: shape of the input piped through 'x'. profmode: Flag for profiling Returns: Dictionary of functions for autoencoder training: cost, and 2 updates """ print 'Compiling autoencoder functions...', # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch lrate = T.fscalar('lrate') # learning rate aug = T.ivector('aug') # data augmentation (jitter, flip) # Functions to calculate our training and validation error while we run functions = {} cost = layer.reconstruct_mse(activation) f_input = [index, aug] functions['train_E'] = theano.function(f_input, cost, givens={ x: NNl.get_batch(training[0], index, batch_size, in_dims, aug)}, mode=profmode) # create a list of gradients for all model parameters grads = T.grad(cost, layer.pt_params) train_input = [index, lrate, aug] # Functions to update the model, via momentum-based SGD mom_updates = [] p_updates = [] for grad_i, param_i in zip(grads, layer.pt_params): delta_i = theano.shared(param_i.get_value()*0.) c_update = (delta_i, rho * delta_i - lrate * grad_i) mom_updates.append(c_update) p_updates.append((param_i, param_i + delta_i)) functions['momentum'] = theano.function(train_input, cost, updates=mom_updates, givens={x: NNl.get_batch(training[0], index, batch_size, in_dims, aug)}, mode=profmode) functions['update'] = theano.function([], updates=p_updates, mode=profmode) print 'done' return functions