예제 #1
0
def main():
    print('=========================================')
    print('               Numpy DNN                 ')
    print('              26/Nov/2017                ')
    print('    By Thang Vu ([email protected])    ')
    print('=========================================')

    # load datasets
    path = 'data/mnist.pkl.gz'
    train_set, val_set, test_set = load_mnist_datasets(path)
    batch_size = 128
    X_train, y_train = train_set
    X_val, y_val = val_set
    X_test, y_test = test_set

    # bookeeping for best model based on validation set
    best_val_acc = -1
    best_model = None

    # create model and optimization method
    dnn = DNN()
    sgd = SGD(lr=0.1, lr_decay=0.1, weight_decay=1e-3, momentum=0.9)
    
    # Train 
    batch_size = 128
    for epoch in range(20):
        dnn.train_mode() # set model to train mode (because of dropout)
        
        num_train = X_train.shape[0]
        num_batch = num_train//batch_size
        for batch in range(num_batch):
            # get batch data
            batch_mask = np.random.choice(num_train, batch_size)
            X_batch = X_train[batch_mask]
            y_batch = y_train[batch_mask]
           
            # forward
            output = dnn.forward(X_batch)
            loss, dout = softmax_cross_entropy_loss(output, y_batch)
            if batch%100 == 0:
                print("Epoch %2d Iter %3d Loss %.5f" %(epoch, batch, loss))

            # backward and update
            grads = dnn.backward(dout)
            sgd.step(dnn.params, grads)
                                
        sgd.decay_learning_rate() # decay learning rate after one epoch
        dnn.eval_mode() # set model to eval mode 
        train_acc = check_acc(dnn, X_train, y_train)
        val_acc = check_acc(dnn, X_val, y_val)

        if(best_val_acc < val_acc):
            best_val_acc = val_acc
            best_model = dnn

        # store best model based n acc_val
        print('Epoch finish. ')
        print('Train acc %.3f' %train_acc)
        print('Val acc %.3f' %val_acc)
        print('-'*30)
        print('')

    print('Train finished. Best acc %.3f' %best_val_acc)
    test_acc = check_acc(best_model, X_test, y_test)
    print('Test acc %.3f' %test_acc)
예제 #2
0
class MLSLNN(Serializable):
    """
    This class initializes a neural network
    based on the size of features per entry along
    with a provided MLSL which generates certain number of outputs
    """
    def __init__(self):
        pass

    def initialize(self,
                   mlsl,
                   nnl,
                   seed=None,
                   weight_range=1.0,
                   outputs_from_mlsl=None,
                   use_softmax=True):
        """
        Initialize an object of this class that binds a new NN on top
        of an existing MLSL object
        :param mlsl:
        :type mlsl: MLSL
        :param nnl:
        :type nnl: list
        :param seed:
        :type seed:
        :param weight_range:
        :type weight_range:
        :return:
        :rtype:
        """
        self.mlsl_output_size = mlsl.output_sizes[
            -1] if outputs_from_mlsl else outputs_from_mlsl

        # Change input size of Neural net to assigned feature size plus MLSL outputs
        nnl[0] += self.mlsl_output_size

        self.outputs_from_mlsl = outputs_from_mlsl

        self.mlsl = mlsl
        self.nnet = DNN()
        self.nnet.initialize(nnl=nnl, seed=seed, weight_range=weight_range)
        self.use_softmax = use_softmax

    def forward(self, input_to_mlsl, additional_input_to_nn, target):
        """
        This runs a forward through the entire model comprising of an MLSL
        followed by a NN
        :param input_to_mlsl:
        :type input_to_mlsl:
        :param additional_input_to_nn:
        :type additional_input_to_nn:
        :return:
        :rtype:
        """
        mlsl_output = self.mlsl._forward_instance(input_to_mlsl, 0)
        input_to_nn = np.concatenate(
            (mlsl_output[:self.mlsl_output_size], additional_input_to_nn))
        nnet_output = self.nnet.forward(input_to_nn)
        if self.use_softmax:
            nnet_output = softmax(nnet_output)

        return nnet_output

    def get_objective_derivative(self, output, target):
        if self.use_softmax:
            return output - target
        else:
            raise ValueError

    def backward(self, loss_deriv, instance_node):

        # Run derivative through LSTM first

        nn_deriv = self.nnet.backward_adadelta(loss_deriv)

        deriv = nn_deriv[:self.mlsl_output_size]

        self.mlsl._compute_backward_gradients(instance_node, deriv, 0)
        self.mlsl._compute_LSTM_updates(instance_node, 0)
        # updating the weights of the LSTM modules and
        # updating momentum_dW of LSTM modules with sums of dWs
        # and the other variables for adadelta
        # these momentum/adadelta specific updates happen regardless of whether we use steady rate, momentum, or adadelta
        # if we use steady rate those variables play no role in the computation of dW
        for d in range(self.mlsl.max_depth + 1):
            self.mlsl.lstm_stack[d].WLSTM += self.mlsl.sum_of_dWs[
                d] / self.mlsl.number_of_nodes_per_level[d]
            self.mlsl.lstm_stack[d].momentum_dW = self.mlsl.sum_of_dWs[
                d] / self.mlsl.number_of_nodes_per_level[d]
            self.mlsl.lstm_stack[
                d].tot_gradient_weight = self.mlsl.sum_tot_delta_weight[
                    d] / self.mlsl.number_of_nodes_per_level[d]
            self.mlsl.lstm_stack[
                d].tot_sq_gradient = self.mlsl.sum_tot_sq_gradient[
                    d] / self.mlsl.number_of_nodes_per_level[d]
            self.mlsl.lstm_stack[
                d].tot_delta_weight = self.mlsl.sum_tot_delta_weight[
                    d] / self.mlsl.number_of_nodes_per_level[d]
            self.mlsl.lstm_stack[d].tot_sq_delta = self.mlsl.sum_tot_sq_delta[
                d] / self.mlsl.number_of_nodes_per_level[d]

    def run_through_the_model(self, instance_node, target,
                              additional_input_to_nn):
        self.mlsl._reset_learning_parameters()
        return self.backward(
            self.get(self.forward(instance_node, additional_input_to_nn),
                     target), instance_node)