def create_network(self): # allocate symbolic variables for the data # construct the MLP class self.layer1 = HiddenLayer(n_in=self.input_size, n_out=self.nof_middle, layer_input=self.x, activation=tanh, w_values=self.layer1_w_values, b_values=self.layer1_b_values) self.layer2 = SoftmaxLayer(n_in=self.nof_middle, n_out=self.nof_output, layer_input=self.layer1.output, w_values=self.layer2_w_values, b_values=self.layer2_b_values) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.cost = ( self.layer2.get_cost(self.y) + self.l1_reg * (self.layer1.get_l1() + self.layer2.get_l1()) + self.l2_reg * (self.layer1.get_l2_sqr() + self.layer2.get_l2_sqr()) ) self.error = self.layer2.get_error(self.y) self.params = self.layer1.get_params() + self.layer2.get_params()
class ShallowNetwork(BaseNetwork): """ This is mostly for debugging and tutorial uses to show you how to define your networks. """ def __init__(self, file_address=None, input_size=None, nof_middle=None, nof_output=None, l1_reg=None, l2_reg=None): self.x = T.matrix('x') # the data is presented as sequence of floats self.y = T.imatrix('y') # the labels are presented as 1D vector of [int] labels (Rejected or Not Rejected) if file_address is None: if input_size is None or nof_middle is None or l1_reg is None or l2_reg is None or nof_output is None: raise Exception("You should set a file name or all of input_size, nof_middle, l1 and l2 reg") self.input_size = input_size self.nof_middle = nof_middle self.nof_output = nof_output self.l1_reg = l1_reg self.l2_reg = l2_reg self.layer1_w_values = None self.layer1_b_values = None self.layer2_w_values = None self.layer2_b_values = None else: self.load_network(file_address) self.create_network() def create_network(self): # allocate symbolic variables for the data # construct the MLP class self.layer1 = HiddenLayer(n_in=self.input_size, n_out=self.nof_middle, layer_input=self.x, activation=tanh, w_values=self.layer1_w_values, b_values=self.layer1_b_values) self.layer2 = SoftmaxLayer(n_in=self.nof_middle, n_out=self.nof_output, layer_input=self.layer1.output, w_values=self.layer2_w_values, b_values=self.layer2_b_values) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.cost = ( self.layer2.get_cost(self.y) + self.l1_reg * (self.layer1.get_l1() + self.layer2.get_l1()) + self.l2_reg * (self.layer1.get_l2_sqr() + self.layer2.get_l2_sqr()) ) self.error = self.layer2.get_error(self.y) self.params = self.layer1.get_params() + self.layer2.get_params() def save_network(self, file_address): network_dict = {"layer1": self.layer1.get_dict(), "layer2": self.layer2.get_dict(), "l1_reg": self.l1_reg, "l2_reg": self.l2_reg} with open(file_address, "wb") as my_file: cPickle.dump(network_dict, my_file, protocol=2) def load_network(self, file_address): with open(file_address, "wb") as my_file: network_dict = cPickle.load(my_file) self.input_size = network_dict["layer1"]["n_in"] self.nof_middle = network_dict["layer1"]["n_out"] self.nof_output = network_dict["layer2"]["n_out"] self.l1_reg = network_dict["l1_reg"] self.l2_reg = network_dict["l2_reg"] self.layer1_w_values = network_dict["layer1"]["w_values"] self.layer1_b_values = network_dict["layer1"]["b_values"] self.layer2_w_values = network_dict["layer2"]["w_values"] self.layer2_b_values = network_dict["layer2"]["b_values"] def train(self, dataset_address): # Project settings batch_size = 20 learning_rate = 0.01 momentum_rate = 0.95 n_epochs = 1000 dataset = NumpyDataset(dataset_address) train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y = dataset.get_dataset() # compute number of minibatches for training, validation and testing n_train_batches, n_valid_batches, n_test_batches = dataset.get_number_of_batches(batch_size) index = T.lscalar() test_model = theano.function(inputs=[index], outputs=self.get_error(), givens={ self.get_input(): test_set_x[index * batch_size:(index + 1) * batch_size], self.get_output(): test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function(inputs=[index], outputs=self.get_cost(), givens= { self.get_input(): valid_set_x[index * batch_size:(index + 1) * batch_size], self.get_output(): valid_set_y[index * batch_size:(index + 1) * batch_size] }) delta_params = [theano.shared(value=numpy.zeros(param.get_value(borrow=True).shape), borrow=True) for param in self.get_params()] params_g = [T.grad(self.get_cost(), param) for param in self.get_params()] # Normal SGD # updates = [(param, param - learning_rate * param_g) for param, param_g in zip(self.get_params(), params_g)] # Momentum SGD updates = [(delta_param, -learning_rate*param_g + momentum_rate*delta_param) for param_g, delta_param in zip(params_g, delta_params)] updates += [(param, param + delta_param) for param, delta_param in zip(self.get_params(), delta_params)] train_model = theano.function(inputs=[index], outputs=self.get_cost(), updates=updates, givens= { self.get_input(): train_set_x[index * batch_size: (index + 1) * batch_size], self.get_output(): train_set_y[index * batch_size: (index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf best_iter = 0 test_score = 0. epoch = 0 done_looping = False train_error_history = collections.OrderedDict() validation_error_history = collections.OrderedDict() while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in xrange(n_train_batches): # iteration number iteration_number = (epoch - 1) * n_train_batches + minibatch_index # Save iteration history minibatch_avg_cost = train_model(minibatch_index) train_error_history[iteration_number] = minibatch_avg_cost # validate every validation_frequency time :) if (iteration_number + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('\r epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)), sys.stdout.flush() # Save validation history validation_error_history[iteration_number] = this_validation_loss # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iteration_number * patience_increase) best_validation_loss = this_validation_loss best_iter = iteration_number # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) if patience <= iteration_number: done_looping = True break plt.plot(train_error_history.keys(), train_error_history.values(), 'r', validation_error_history.keys(), validation_error_history.values(), 'b') print "\nTraining is Done" sys.stdout.flush() plt.savefig("assets/outputs/image-%s-%2.4f.jpg" % (str(datetime.now().replace(microsecond=0)), test_score * 100.)) print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) def get_error(self): return self.error def get_cost(self): return self.cost def get_input(self): return self.x def get_output(self): return self.y def get_params(self): return self.params