def main(): print('=========================================') print(' Numpy DNN ') print(' 26/Nov/2017 ') print(' By Thang Vu ([email protected]) ') print('=========================================') # load datasets path = 'data/mnist.pkl.gz' train_set, val_set, test_set = load_mnist_datasets(path) batch_size = 128 X_train, y_train = train_set X_val, y_val = val_set X_test, y_test = test_set # bookeeping for best model based on validation set best_val_acc = -1 best_model = None # create model and optimization method dnn = DNN() sgd = SGD(lr=0.1, lr_decay=0.1, weight_decay=1e-3, momentum=0.9) # Train batch_size = 128 for epoch in range(20): dnn.train_mode() # set model to train mode (because of dropout) num_train = X_train.shape[0] num_batch = num_train//batch_size for batch in range(num_batch): # get batch data batch_mask = np.random.choice(num_train, batch_size) X_batch = X_train[batch_mask] y_batch = y_train[batch_mask] # forward output = dnn.forward(X_batch) loss, dout = softmax_cross_entropy_loss(output, y_batch) if batch%100 == 0: print("Epoch %2d Iter %3d Loss %.5f" %(epoch, batch, loss)) # backward and update grads = dnn.backward(dout) sgd.step(dnn.params, grads) sgd.decay_learning_rate() # decay learning rate after one epoch dnn.eval_mode() # set model to eval mode train_acc = check_acc(dnn, X_train, y_train) val_acc = check_acc(dnn, X_val, y_val) if(best_val_acc < val_acc): best_val_acc = val_acc best_model = dnn # store best model based n acc_val print('Epoch finish. ') print('Train acc %.3f' %train_acc) print('Val acc %.3f' %val_acc) print('-'*30) print('') print('Train finished. Best acc %.3f' %best_val_acc) test_acc = check_acc(best_model, X_test, y_test) print('Test acc %.3f' %test_acc)
class MLSLNN(Serializable): """ This class initializes a neural network based on the size of features per entry along with a provided MLSL which generates certain number of outputs """ def __init__(self): pass def initialize(self, mlsl, nnl, seed=None, weight_range=1.0, outputs_from_mlsl=None, use_softmax=True): """ Initialize an object of this class that binds a new NN on top of an existing MLSL object :param mlsl: :type mlsl: MLSL :param nnl: :type nnl: list :param seed: :type seed: :param weight_range: :type weight_range: :return: :rtype: """ self.mlsl_output_size = mlsl.output_sizes[ -1] if outputs_from_mlsl else outputs_from_mlsl # Change input size of Neural net to assigned feature size plus MLSL outputs nnl[0] += self.mlsl_output_size self.outputs_from_mlsl = outputs_from_mlsl self.mlsl = mlsl self.nnet = DNN() self.nnet.initialize(nnl=nnl, seed=seed, weight_range=weight_range) self.use_softmax = use_softmax def forward(self, input_to_mlsl, additional_input_to_nn, target): """ This runs a forward through the entire model comprising of an MLSL followed by a NN :param input_to_mlsl: :type input_to_mlsl: :param additional_input_to_nn: :type additional_input_to_nn: :return: :rtype: """ mlsl_output = self.mlsl._forward_instance(input_to_mlsl, 0) input_to_nn = np.concatenate( (mlsl_output[:self.mlsl_output_size], additional_input_to_nn)) nnet_output = self.nnet.forward(input_to_nn) if self.use_softmax: nnet_output = softmax(nnet_output) return nnet_output def get_objective_derivative(self, output, target): if self.use_softmax: return output - target else: raise ValueError def backward(self, loss_deriv, instance_node): # Run derivative through LSTM first nn_deriv = self.nnet.backward_adadelta(loss_deriv) deriv = nn_deriv[:self.mlsl_output_size] self.mlsl._compute_backward_gradients(instance_node, deriv, 0) self.mlsl._compute_LSTM_updates(instance_node, 0) # updating the weights of the LSTM modules and # updating momentum_dW of LSTM modules with sums of dWs # and the other variables for adadelta # these momentum/adadelta specific updates happen regardless of whether we use steady rate, momentum, or adadelta # if we use steady rate those variables play no role in the computation of dW for d in range(self.mlsl.max_depth + 1): self.mlsl.lstm_stack[d].WLSTM += self.mlsl.sum_of_dWs[ d] / self.mlsl.number_of_nodes_per_level[d] self.mlsl.lstm_stack[d].momentum_dW = self.mlsl.sum_of_dWs[ d] / self.mlsl.number_of_nodes_per_level[d] self.mlsl.lstm_stack[ d].tot_gradient_weight = self.mlsl.sum_tot_delta_weight[ d] / self.mlsl.number_of_nodes_per_level[d] self.mlsl.lstm_stack[ d].tot_sq_gradient = self.mlsl.sum_tot_sq_gradient[ d] / self.mlsl.number_of_nodes_per_level[d] self.mlsl.lstm_stack[ d].tot_delta_weight = self.mlsl.sum_tot_delta_weight[ d] / self.mlsl.number_of_nodes_per_level[d] self.mlsl.lstm_stack[d].tot_sq_delta = self.mlsl.sum_tot_sq_delta[ d] / self.mlsl.number_of_nodes_per_level[d] def run_through_the_model(self, instance_node, target, additional_input_to_nn): self.mlsl._reset_learning_parameters() return self.backward( self.get(self.forward(instance_node, additional_input_to_nn), target), instance_node)