def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=False): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = np.random.RandomState(1234) # TODO: construct a neural network classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=False): """ Wrapper function for experiment of noise injection at input :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = np.random.RandomState(23455) # Load down-sampled dataset in raw format (np.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a np.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a np.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(theano_shared=False) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch adv_error = T.vector() # error of adversarial example x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = np.random.RandomState(1234) classifier = myMLP( rng=rng, input=x + adv_error, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index, adv_error], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index, adv_error], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # compute gradient with respect to input ginparam = T.grad(cost, classifier.input) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index, adv_error], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO add sign test_adv_model = theano.function( inputs=[index, adv_error], outputs=[ginparam, classifier.p_y_given_x], givens={ x: test_set_x[index:index+1], y: test_set_y[index:index+1] } ) test_adv_example = theano.function( inputs=[index, adv_error], outputs=classifier.p_y_given_x, givens={ x: test_set_x[index:index+1] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose, np.zeros(shape=train_set_x.get_value()[0].shape, dtype=np.float32)) adv_grad, p_y_given_x = test_adv_model(0, np.zeros(shape=train_set_x.get_value()[0].shape, dtype=np.float32)) adv_p_y_given_x = test_adv_example(0, -1e-2*np.sign(adv_grad).flatten()) return p_y_given_x, adv_p_y_given_x
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False): """ Wrapper function for testing CNN in cascade with DNN :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/3, 28/3) = (9, 9) # 4D output tensor is thus of shape (batch_size, nkerns[0], 9, 9) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(3, 3) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (9-7+1, 9-7+1) = (3, 3) # maxpooling reduces this further to (3/2, 3/2) = (1, 1) # 4D output tensor is thus of shape (batch_size, nkerns[1], 1, 1) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 9, 9), filter_shape=(nkerns[1], nkerns[0], 7, 7), poolsize=(2, 2), ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 2 * 2), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) hidden_neurons = [500, 200, 100] # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 1 * 1, n_out=hidden_neurons[0], activation=T.tanh) layer3 = HiddenLayer(rng, input=layer2.output, n_in=hidden_neurons[0], n_out=hidden_neurons[1], activation=T.tanh) layer4 = HiddenLayer(rng, input=layer3.output, n_in=hidden_neurons[1], n_out=hidden_neurons[2], activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer5 = LogisticRegression(input=layer4.output, n_in=hidden_neurons[2], n_out=10) # the cost we minimize during training is the NLL of the model cost = layer5.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer5.errors(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer5.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print("... training") train_nn( train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose )