def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20], batch_size=200, verbose=False, filter_size=2): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer new_shape = (32 - filter_size + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], new_shape, new_shape), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # Combine Layer 0 output and Layer 1 output # TODO: downsample the first layer output to match the size of the second # layer output. # TDOD: change ds layer0_output_ds = downsample.max_pool_2d(input=layer0.output, ds=(2, 2), ignore_border=True) # concatenate layer layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) # TODO: Construct the third convolutional pooling layer new_shape = (new_shape - filter_size + 1) // 2 layer2 = LeNetConvPoolLayer(rng, input=layer2_input, image_shape=(batch_size, nkerns[0] + nkerns[1], new_shape, new_shape), filter_shape=(nkerns[2], nkerns[0] + nkerns[1], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer new_shape = (new_shape - filter_size + 1) // 2 layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * new_shape * new_shape, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False, filter_size=5): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer new_shape = (32 - filter_size + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], new_shape, new_shape), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer new_factors = (new_shape - filter_size + 1) // 2 layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * new_factors * new_factors, n_out=500, activation=T.tanh) layer3 = HiddenLayer(rng, input=layer2.output, n_in=500, n_out=500, activation=T.tanh) # TODO: classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20],filter_shape=[9,5], batch_size=200, verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,filter_shape[0],filter_shape[0]), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-9+1)/2 = 12 image_shape=(batch_size,nkerns[0],(33-filter_shape[0])/2,(33-filter_shape[0])/2), filter_shape=(nkerns[1],nkerns[0],filter_shape[1],filter_shape[1]), poolsize=(2,2) ) # Combine Layer 0 output and Layer 1 output # TODO: downsample the first layer output to match the size of the second # layer output. layer0_output_ds = downsample.max_pool_2d( # nkerns[0] 12 x 12 # nkerns[1] 4 x 4 input=layer0.output, ds=(3,3), # TDOD: change ds ignore_border=False ) # concatenate layer layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) filter_shape_2 = ((33-filter_shape[0])/2 - filter_shape[1]+1)/2 # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer2_input, # (12-5+1)/2 = 4 image_shape=(batch_size,nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO filter_shape=(nkerns[2],nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO poolsize= (1,1)#TODO ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 1 * 1, n_out= 10,#TODO, activation=T.nnet.sigmoid ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= 10,#TODO n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_adversarial_example(learning_rate=0.03, L1_reg=0.0001, L2_reg=0.0001, n_epochs=1, batch_size=128, n_hidden=400, n_hiddenLayers=12, verbose=False, noise_mean=0.0, noise_var=1.0): """ Wrapper function for testing adversarial examples """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset datasets = load_data(ds_rate=5) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) srng = RandomStreams(seed=234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) get_preds = theano.function( inputs=[index], outputs=[classifier.y_pred, classifier.p_y_given_x], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] # TODO: modify updates to inject noise to the weight updates = [ (param, param - learning_rate * gparam + srng.normal(size=gparam.shape, avg=noise_mean, std=noise_var)) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # This function takes the gradient with respect to the input gparamx = T.grad(cost, classifier.input) calc_gradx = theano.function( [index], gparamx, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] }) # Intermedaite step to get the original data get_x = theano.function( [index], test_set_x[index * batch_size: (index + 1) * batch_size]) get_y = theano.function( [index], test_set_y[index * batch_size: (index + 1) * batch_size]) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) # Get the gradient for a batch of inputs x_adv = get_x(1) gx_adv = numpy.sign(calc_gradx(1)[0]) ad_example = x_adv + gx_adv * numpy.random.random(gx_adv.shape)*0.0000000001 shared_adv_x = theano.shared(numpy.asarray(ad_example, dtype=theano.config.floatX), borrow=True) get_predsadv = theano.function( inputs=[index], outputs=[classifier.y_pred, classifier.p_y_given_x], givens = { x: shared_adv_x[(index*0):] } ) ap = get_predsadv(1) op = get_preds(1) ys = get_y(1) indexes = [i for i in range(128) if ys[i]==op[0][i]] # This is the selection of the third element with correct class from the original prediction indx = indexes[3] return x_adv, op, ap, ad_example, ys, indx
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=False): """ Wrapper function for testing adversarial examples """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) filter_model = theano.function( inputs=[index], outputs=[ x, classifier.logRegressionLayer.y_pred, y, classifier.logRegressionLayer.p_y_given_x ], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) filter_output = [filter_model(i) for i in range(n_test_batches)] sample_x = None sample_y = None test_output = None expected_distribution = None for i in filter_output: if numpy.array_equal(i[1], i[2]): sample_x = i[0] sample_y = i[1] expected_distribution = i[3] print("successfully classified sample ", sample_y) t_sample_x, t_sample_y = shared_dataset((sample_x, sample_y)) grad_input = classifier.input + 0.1 * T.sgn( T.grad(cost, classifier.input)) grad_input_fn = theano.function(inputs=[], outputs=grad_input, givens={ x: t_sample_x, y: t_sample_y }) gradient = grad_input_fn() new_t_sample_x, t_sample_y = shared_dataset((gradient, sample_y)) testing_gradient = theano.function( inputs=[], outputs=[ y, classifier.logRegressionLayer.y_pred, classifier.logRegressionLayer.p_y_given_x ], givens={ x: new_t_sample_x, y: t_sample_y }) test_output = testing_gradient() if not numpy.array_equal(test_output[0], test_output[1]): break return test_output, expected_distribution
def test_para_num(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],L1_reg=0.00, L2_reg=0.0001, batch_size=128, n_hiddenLayers=2,verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') ########################################################################### ################################## CNN #################################### ########################################################################### # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ########################################################################### ################################## MLP #################################### ########################################################################### ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') n_hidden = [0,0]; n_hidden[0]=nkerns[0]*14*14 n_hidden[1]=nkerns[1]*5*5 # TODO: construct a neural network, either MLP or CNN. classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def MY_lenet(learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500, L1_reg=0.00, L2_reg=0.0001): rng = numpy.random.RandomState(23455) ds_rate = None datasets = load_data(ds_rate=ds_rate, theano_shared=False) train_set_x, train_set_y = datasets[0] train_size = train_set_x.shape n_train = train_size[0] ''' print '... Translating images' train_set_x_tran = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = translate_image(img) train_set_x_tran[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) print '... Rotating images' train_set_x_rota = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = rotate_image(img) train_set_x_rota[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) ''' print '... Fliping images' train_set_x_flip = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i], (3, 32, 32))).transpose(1, 2, 0) img_tran = flip_image(img) train_set_x_flip[i] = np.reshape(img_tran.transpose(2, 0, 1), (3 * 32 * 32)) ''' print '... Ennoising images' train_set_x_nois = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = noise_injection(img) train_set_x_aug[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) ''' train_set_x = np.concatenate( ( train_set_x, #train_set_x_tran, #train_set_x_rota, train_set_x_flip), axis=0) train_set_y = np.concatenate( ( train_set_y, #train_set_y, #train_set_y, train_set_y), axis=0) datasets[0] = [train_set_x, train_set_y] train_set_x, train_set_y = shared_dataset(datasets[0]) valid_set_x, valid_set_y = shared_dataset(datasets[1]) test_set_x, test_set_y = shared_dataset(datasets[2]) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels training_enabled = T.iscalar( 'training_enabled' ) # pseudo boolean for switching between training and prediction ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 3, 3), poolsize=(2, 2)) #print 'layer0.output.shape =' #print layer0.output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)}) layerbn = BatchNormalization(input_shape=(batch_size, nkerns[0], 15, 15), mode=1, momentum=0.9) layerbn_output = layerbn.get_result(layer0.output) #print 'layerbn_output.shape =' #print layerbn_output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)}) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng, input=layerbn_output, image_shape=(batch_size, nkerns[0], 15, 15), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer2_input, n_in=nkerns[1] * 6 * 6, n_out=4096, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer3 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer2.output, n_in=4096, n_out=2048, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer4 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer3.output, n_in=2048, n_out=1024, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer5 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer4.output, n_in=1024, n_out=512, activation=T.nnet.relu) # classify the values of the fully-connected sigmoidal layer layer6 = LogisticRegression(input=layer5.output, n_in=512, n_out=10) # L1 norm ; one regularization option is to enforce L1 norm to # be small L1 = (abs(layer2.W).sum() + abs(layer3.W).sum() + abs(layer4.W).sum() + abs(layer5.W).sum() + abs(layer6.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small L2_sqr = ((layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum()) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (layer6.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer6.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], layer6.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) # create a list of all model parameters to be fit by gradient descent params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ''' # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs momentum = theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # end-snippet-1 ############### # TRAIN MODEL # ############### train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose=True)
def test_data_augmentation(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0], "w") train_set_x_d = translate_image(train_set[0], "s") train_set_x_r = translate_image(train_set[0], "d") train_set_x_l = translate_image(train_set[0], "a") # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') output = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) return output
def test_adversarial_example(learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=True, smaller_set=True): """ Wrapper function for testing adversarial examples """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # test_set_x = test_set_x[0:1] # test_set_y = test_set_y[0:1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) test_model_single = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index:index+1], y: test_set_y[index:index+1] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') gx = T.grad(cost, x) train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) f = theano.function( inputs=[index], outputs=gx, givens={ x: test_set_x[index : (index + 1)], y: test_set_y[index : (index + 1)] } ) ind_oi = 3 from matplotlib import pyplot as plt plt.figure() plt.imshow(test_set_x.get_value()[ind_oi,:].reshape(3,32,32).transpose((1,2,0))) h = theano.function( inputs=[index], outputs=classifier.logRegressionLayer.y_pred, givens={ x: test_set_x[index : (index + 1)] } ) print('predicted number original: %i' % h(ind_oi)) Y = T.matrix() X_update = (test_set_x, T.inc_subtensor(test_set_x[ind_oi:(ind_oi+1)], Y)) g = theano.function([Y], updates=[X_update]) g(0.01*numpy.sign(f(ind_oi))) print('predicted number adverserial: %i' % h(ind_oi)) plt.figure() plt.imshow(test_set_x.get_value()[ind_oi,:].reshape(3,32,32).transpose((1,2,0)))
def test_adv_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value( borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value( borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value( borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) g_adv = T.grad(cost, classifier.input) # gradient = theano.function( # inputs=[index], # outputs=g_adv, # givens={ # x: train_set_x[index:index+1], # y: train_set_y[index:index+1] # } # ) gradient = theano.function(inputs=[x, y], outputs=g_adv) # with open('list_weight', 'w') as F: # pickle.dump([numpy.array(p.eval()) for p in classifier.params], F) # print test_set_x.shape.eval(), test_set_x[0,:].shape.eval(), gradient(0).shape # Reverse engineered from utils img = test_set_x[0, :].eval().reshape((3, 32, 32)).transpose(1, 2, 0) img_add = train_set_x[0, :].eval().reshape( (3, 32, 32)).transpose(1, 2, 0) g = gradient(train_set_x[0:1].eval(), train_set_y[0:1].eval()) img_grad = g.reshape((3, 32, 32)).transpose(1, 2, 0) test_set_x_adv = test_set_x[0, :] + 0.05 * g test_set_x_adv = test_set_x_adv.reshape(g.shape) img_adv = test_set_x_adv.eval().reshape((3, 32, 32)).transpose(1, 2, 0) ''' img = test_set_x[0,:].eval().reshape((3, 32, 32)).transpose(1,2,0) img_add = train_set_x[0,:].eval().reshape((3, 32, 32)).transpose(1,2,0) img_grad = gradient(0).reshape((3, 32, 32)).transpose(1,2,0) test_set_x_adv = test_set_x[0,:] + 0.05 * gradient(0).reshape(-1,) test_set_x_adv = test_set_x_adv.reshape(gradient(0).shape) img_adv = test_set_x_adv.eval().reshape((3, 32, 32)).transpose(1,2,0) ''' # Plot image, adversarial image, added image and added image gradient plt.figure(figsize=(12, 4)) plt.subplot(1, 4, 1) plt.title('Original Image') plt.axis('off') plt.imshow(img) plt.subplot(1, 4, 2) plt.title('Adversarial Image') plt.axis('off') plt.imshow(img_adv) plt.subplot(1, 4, 3) plt.title('Added Image') plt.axis('off') plt.imshow(img_add) plt.subplot(1, 4, 4) plt.title('Image Gradient') plt.axis('off') plt.imshow(img_grad) plt.tight_layout() ''' predict_model_adv = theano.function( inputs=[index], outputs=classifier.predictions(index), givens={ x: test_set_x_adv[index:index+1] } ) predict_model_norm = theano.function( inputs=[index], outputs=classifier.predictions(index), givens={ x: test_set_x[index:index+1] } ) test_model_adv = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x_adv[index:index+1], y: test_set_y[index:index+1] } ) test_model_norm = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index:index+1], y: test_set_y[index:index+1] } ) print predict_model_norm(0), predict_model_adv(0) print test_model_norm(0), test_model_adv(0) ''' predict = theano.function(inputs=[x], outputs=classifier.predictions()) # Plot bar graphs x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] plt.figure(figsize=(8, 6)) plt.subplot(1, 2, 1) plt.bar(x, predict(test_set_x[0:1].eval())[0]) plt.title('Original Predicted Probabilities') plt.subplot(1, 2, 2) plt.bar(x, predict(test_set_x_adv.eval())[0]) plt.title('Adversarial Predicted Probabilities') plt.tight_layout() plt.show()
def test_noise_injection_at_weight(learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=True,noise_level=0.001,noise_dist='uniform'): """ Wrapper function for experiment of noise injection at weights :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset datasets = load_data(ds_rate=5) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (stored in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] # TODO: modify updates to inject noise to the weight # # the parameters of the model are the parameters of the two layer it is made out of # self.params = sum([x.params for x in self.hiddenLayers], []) + self.logRegressionLayer.params # # parameters of hiddenlayer and logRegressionLayer # self.params = [self.W, self.b] updates = [ # W b W b W b layernumx2 # (classifier.params[0::2], classifier.params[0::2] - learning_rate * gparams[0::2]), # (classifier.params[1::2], classifier.params[1::2] - learning_rate * gparams[1::2]) # (param, param - learning_rate * gparam) # for param, gparam in zip(classifier.params, gparams) (param, param - learning_rate * gparam + noise_injection(param.get_value(),noise_level,noise_dist)) for param, gparam in zip(classifier.params[0::2], gparams[0::2]) + [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params[1::2], gparams[1::2])] ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_emotionTraining(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20], batch_size=200, verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = loadData() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### #Make learning rate a theano shared variable learning_rate = theano.shared(learning_rate) print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 1 * 48 * 48) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 48, 48)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape= (batch_size, 1, 48, 48), filter_shape= (nkerns[0],1,3,3), poolsize= (2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape= (batch_size, nkerns[0], 23, 23) , filter_shape= (nkerns[1],nkerns[0],4,4), poolsize= (2,2) ) # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape= (batch_size,nkerns[1],10,10), filter_shape= (nkerns[2],nkerns[1],3,3), poolsize= (2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 4 * 4, n_out= batch_size, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= batch_size, n_out=7) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer0.params + layer1.params + layer2.params + layer3.params + layer4.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate.get_value().item() * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) getPofYGivenX = theano.function( [index], layer4.pOfYGivenX(), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] }, on_unused_input='ignore' ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) print('Training the model complete') f1 = open('layer0.W', 'wb') cPickle.dump(layer0.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer0.b', 'wb') cPickle.dump(layer0.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer1.W', 'wb') cPickle.dump(layer1.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer1.b', 'wb') cPickle.dump(layer1.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer2.W', 'wb') cPickle.dump(layer2.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer2.b', 'wb') cPickle.dump(layer2.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer3.W', 'wb') cPickle.dump(layer3.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer3.b', 'wb') cPickle.dump(layer3.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer4.W', 'wb') cPickle.dump(layer4.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer4.b', 'wb') cPickle.dump(layer4.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() print("Saving the model complete") predictedList = getPofYGivenX(1) print("List of probabilities predicted = " + str(predictedList))
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): """ Wrapper function for testing adversarial examples :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) y_pred_model = theano.function( inputs=[index], outputs=classifier.y_pred, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], } ) p_y_given_x_model = theano.function( inputs=[index], outputs=classifier.p_y_given_x, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], } ) y_pred=numpy.array([]) y_actual=numpy.array([]) for i in range(n_test_batches): y_pred=numpy.append(y_pred, y_pred_model(i)) y_actual=numpy.append(y_actual, test_set_y.eval()[i*batch_size:(i + 1) * batch_size]) print 'y_pred', y_pred print 'y_actual', y_actual grad_input=T.grad(cost, classifier.input) f1=theano.function( inputs=[x,y], outputs=T.add(x, T.sgn(grad_input))) new_x = f1(test_set_x.eval(), test_set_y.eval()) new_x = theano.shared(numpy.asarray(new_x, dtype=theano.config.floatX), borrow=True) y_pred_model_adverse = theano.function( inputs=[index], outputs=classifier.y_pred, givens={ x: new_x[index * batch_size:(index + 1) * batch_size], } ) p_y_given_x_model_adverse = theano.function( inputs=[index], outputs=classifier.p_y_given_x, givens={ x: new_x[index * batch_size:(index + 1) * batch_size], } ) p_y_given_x_adverse=numpy.array([]) p_y_given_x_original=numpy.array([]) y_pred_adverse=numpy.array([]) for i in range(n_test_batches): y_pred_adverse=numpy.append(y_pred_adverse, y_pred_model_adverse(i)) if i==0: p_y_given_x_adverse=p_y_given_x_model_adverse(i) p_y_given_x_original=p_y_given_x_model(i) elif i>0: p_y_given_x_adverse=numpy.vstack((p_y_given_x_adverse, p_y_given_x_model_adverse(i))) p_y_given_x_original=numpy.vstack((p_y_given_x_original, p_y_given_x_model(i))) f, ax = plt.subplots(5,4, figsize=(15,15)) for i in range(5): pred=y_pred[y_actual==y_pred][i] pred_adv=y_pred_adverse[y_actual==y_pred][i] pyx=p_y_given_x_original[y_actual==y_pred][i] pyx_adverse=p_y_given_x_adverse[y_actual==y_pred][i] img=numpy.array(test_set_x.eval())[y_actual==y_pred,:][i,:].reshape(3,32,32) img_adverse=numpy.array(new_x.eval())[y_actual==y_pred,:][i,:].reshape(3,32,32) ax[i,0].imshow(numpy.transpose(img,(1,2,0))) ax[i,0].axis('off') ax[i,0].set_title('Example %s:\nCorrectly predicted value: %s' % (i+1,int(pred))) ax[i,1].imshow(numpy.transpose(img_adverse,(1,2,0))) ax[i,1].axis('off') ax[i,1].set_title('Example %s:\nAdversarial example\nPredicted value: %s' % (i+1, int(pred_adv))) ax[i,2].bar(numpy.arange(0,10)-0.5, pyx) ax[i,2].set_xticks(numpy.arange(0,10)) ax[i,2].set_title('Example %s: Class specific\nprobabilities for original data' % (i+1)) ax[i,2].set_ylabel('p(y|x)') ax[i,3].bar(numpy.arange(0,10)-0.5, pyx_adverse) ax[i,3].set_xticks(numpy.arange(0,10)) ax[i,3].set_title('Example %s: Class specific\nprobabilities for adversarial data' % (i+1)) ax[i,3].set_ylabel('p(y|x)') plt.tight_layout() return p_y_given_x_adverse
def test_dropout(learning_rate=0.1, n_epochs=1000, nkerns=[64, 128], batch_size=120, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels testing = T.iscalar('testing') testValue = testing getTestValue = theano.function([testing], testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = DropOut(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=batch_size, testing=testing) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore', allow_input_downcast=True) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_data_augmentation(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0], 1) train_set_x_d = translate_image(train_set[0], 2) train_set_x_r = translate_image(train_set[0], 3) train_set_x_l = translate_image(train_set[0], 4) # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500, verbose=True, fileName='predictionsMLP'): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(learning_rate) testing = T.lscalar('testing') testValue = testing getTestValue = theano.function([testing], testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) layer0_input = layer0_input.flatten(2) # TODO: Construct the first convolutional pooling layer layer0 = HiddenLayer(rng, input=layer0_input, n_in=32 * 32 * 3, n_out=n_hidden, activation=T.tanh) layer1 = HiddenLayer(rng, input=layer0.output, n_in=n_hidden, n_out=n_hidden, activation=T.tanh) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # TODO: construct a fully-connected sigmoidal layer layer2 = DropConnect(rng, input=layer1.output, n_in=n_hidden, n_out=batch_size, testing=testing) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) print("Model building complete") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') getPredictedValue = theano.function( [index], layer3.predictedValue(), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] updates = [] momentum = 0.9 for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) if (param.name == 'WDrop'): updates.append((param, param - learning_rate.get_value().item() * layer2.maskW.get_value() * param_update)) elif (param.name == 'bDrop'): updates.append((param, param - learning_rate.get_value().item() * layer2.maskb.get_value() * param_update)) else: updates.append( (param, param - learning_rate.get_value().item() * param_update)) updates.append( (param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) ''' updates = [ (param_i, param_i - learning_rate * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' print("Commpiling the train model function") train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(0) }, on_unused_input='ignore', allow_input_downcast=True) ############### # TRAIN MODEL # ############### print('... training') predictions = train_nn(train_model, validate_model, test_model, getPredictedValue, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) f = open(fileName, 'wb') cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for testing adversarial examples """ # First, train a network using the small dataset. rng = numpy.random.RandomState(23455) # Load the smaller dataset train_set, valid_set, test_set = load_data(ds_rate=5) test_set_x, test_set_y = test_set valid_set_x, valid_set_y = valid_set train_set_x, train_set_y = train_set # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) probability = theano.function( inputs=[], outputs=[classifier.logRegressionLayer.p_y_given_x, y], givens={ x: test_set_x, y: test_set_y } ) gradient = theano.function( inputs=[], outputs=classifier.input + 0.007 * T.sgn(T.grad(cost, classifier.input)), givens={ x: test_set_x, y: test_set_y } ) # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ori_prob, ori_y = probability() # I use MATLAB to compare the predicted classification and y in test_32x32.mat # the 14th test data is correctly classified thus using idx = 13 idx = 13 new_test_x = gradient() adversarial = theano.function( inputs=[], outputs=[classifier.logRegressionLayer.p_y_given_x, classifier.logRegressionLayer.y_pred, y], givens={ x: new_test_x, y: test_set_y } ) adver_prob, adver_y, _ = adversarial() return ori_prob[idx], ori_y[idx], adver_prob[idx], adver_y[idx], test_set_x.get_value(borrow=True), new_test_x
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mlp_bonus(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) return [x.params[0].get_value() for x in classifier.hiddenLayers]+[classifier.logRegressionLayer.params[0].get_value()]
def test_filter(learning_rate=0.1, n_epochs=1000, nkerns=[3, 512], batch_size=200, verbose=True): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) mean_w_0 = layer0.W.get_value().mean() plt.figure() for knkerns0 in range(nkerns[0]): for kch in range(3): plt.subplot(3,3,knkerns0*3+kch+1) plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:]) plt.title('trained filter') ########################################################################### ########################################################################### ########################################################################### filter_shape_input = (nkerns[0],3,5,5) pt_input = numpy.zeros((filter_shape_input[2],filter_shape_input[3])) pt_input[(filter_shape_input[2]-1)/2,(filter_shape_input[3]-1)/2]=1.0 W = numpy.zeros(filter_shape_input) from scipy.ndimage.filters import gaussian_filter as gf for knkerns0 in range(nkerns[0]): for kch in range(3): W[knkerns0,kch,:,:]=gf(pt_input,(knkerns0+1.0)) W[knkerns0,kch,:,:] = W[knkerns0,kch,:,:]/W[knkerns0,kch,:,:].mean()*mean_w_0 W = theano.shared(W,borrow=True) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=filter_shape_input, poolsize=(2,2) ) layer0.W = W # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent # the param of layer0 is excluded params = layer3.params + layer2.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) plt.figure() for knkerns0 in range(nkerns[0]): for kch in range(3): plt.subplot(3,3,knkerns0*3+kch+1) plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:]) plt.title('pre-defined filter')
def test_dropconnect3(learning_rate=0.1, n_epochs=1000, nkerns=[16,64,20], batch_size=20, verbose=True, fileName = 'predictionsDropConnect3_Cifar',activation=tanh,fullyconnected=300,p=0.5): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data_cifar() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(learning_rate) #testing = T.lscalar('testing') testing = T.iscalar('testing') testValue = testing getTestValue = theano.function([testing],testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size,3,32,32), filter_shape=(nkerns[0],3,5,5), poolsize=(2,2), activation=tanh ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2), activation=tanh ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size,nkerns[1],5,5), filter_shape=(nkerns[2],nkerns[1],2,2), poolsize=(2,2), activation=tanh ) layer3_input = layer2.output.flatten(2) layer3 = DropConnect( rng, input=layer3_input, n_in=nkerns[2]*2*2, n_out=fullyconnected, testing=testing, activation=activation, p=p ) # TODO: classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression( input=layer3.output, n_in=fullyconnected, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) print("Model building complete") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) getPredictedValue = theano.function( [index], layer4.predictedValue(), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params+layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] updates = [] momentum = 0.9 for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) if (param.name == 'WDrop'): updates.append((param,param - learning_rate.get_value().item() * layer3.maskW.get_value() * param_update)) elif(param.name == 'bDrop'): updates.append((param,param - learning_rate.get_value().item() * layer3.maskb.get_value() * param_update)) else: updates.append((param,param - learning_rate.get_value().item() * param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) ''' updates = [ (param_i, param_i - learning_rate.get_value().item() * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate.get_value().item() * grad_i) for param_i, grad_i in zip(params, grads) ] ''' print("Commpiling the train model function") train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], testing : getTestValue(0) }, on_unused_input='ignore', allow_input_downcast=True ) ############### # TRAIN MODEL # ############### print('... training') predictions = train_nn(train_model, validate_model, test_model, getPredictedValue, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) f = open(fileName, 'wb') cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def test_gaussian(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/2, 28/2) = (14, 14) # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # TODO: Construct the second convolutional pooling layer # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10) # maxpooling reduces this further to (10/2, 10/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.tanh ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] layer0.W = [make_Gaussian(size = 5), make_Gaussian(size = 5), make_Gaussian(size = 5)] layer0.b = numpy.zeros((nkerns[0],), dtype=theano.config.floatX) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ print test_lenet.__name__, nkerns, filter_size, gabor, lmbda rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) if gabor is True: # Generate Gabor filters filters = build_gabor(filter_size, nkerns[0], lmbda) # filters = numpy.array([filters[i][0] for i in range(len(filters))]) filters = numpy.array([filters[i] for i in range(len(filters))]) # print filters.shape filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2), weights=filter_weights) print 'gabor filter weights are working' else: # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer i_s_1 = (32 - filter_size + 1) / 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], i_s_1, i_s_1), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer i_s_2 = (i_s_1 - filter_size + 1) / 2 if hasattr(n_hidden, '__iter__'): assert (len(n_hidden) == dnn_layers) else: n_hidden = (n_hidden, ) * dnn_layers DNN_Layers = [] for i in xrange(dnn_layers): h_input = layer2_input if i == 0 else DNN_Layers[i - 1].output h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i - 1] DNN_Layers.append( HiddenLayer(rng=rng, input=h_input, n_in=h_in, n_out=n_hidden[i], activation=T.tanh)) # layer2 = HiddenLayer( # rng, # input=layer2_input, # n_in=nkerns[1] * i_s_2 * i_s_2, # n_out=500, # activation=T.tanh # ) # TODO: classify the values of the fully-connected sigmoidal layer LR_Layer = LogisticRegression(input=DNN_Layers[-1].output, n_in=n_hidden[i], n_out=10) # the cost we minimize during training is the NLL of the model cost = LR_Layer.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], LR_Layer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], LR_Layer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = LR_Layer.params for layer in DNN_Layers: params += layer.params if gabor is True: print 'gabor params is workings' params += layer1.params else: params += layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, n_hidden=[200,200,200], verbose=True): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out= n_hidden[0],#TODO, activation=T.nnet.sigmoid ) layer3 = HiddenLayer( rng, input=layer2.output, n_in=n_hidden[0], n_out=n_hidden[1],#TODO, activation=T.nnet.sigmoid ) layer4 = HiddenLayer( rng, input=layer3.output, n_in=n_hidden[1], n_out=n_hidden[2],#TODO, activation=T.nnet.sigmoid ) layer5 = LogisticRegression( input=layer4.output, n_in=n_hidden[2], n_out=10 ) # the cost we minimize during training is the NLL of the model cost = layer5.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer5.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer5.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ print test_lenet.__name__, nkerns, filter_size, gabor, lmbda rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) if gabor is True: # Generate Gabor filters filters = build_gabor(filter_size, nkerns[0], lmbda) # filters = numpy.array([filters[i][0] for i in range(len(filters))]) filters = numpy.array([filters[i] for i in range(len(filters))]) # print filters.shape filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2,2), weights = filter_weights ) print 'gabor filter weights are working' else: # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer i_s_1 = (32 - filter_size + 1) / 2 layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], i_s_1, i_s_1), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer i_s_2 = (i_s_1 - filter_size + 1) / 2 if hasattr(n_hidden, '__iter__'): assert(len(n_hidden) == dnn_layers) else: n_hidden = (n_hidden,)*dnn_layers DNN_Layers = [] for i in xrange(dnn_layers): h_input = layer2_input if i == 0 else DNN_Layers[i-1].output h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i-1] DNN_Layers.append( HiddenLayer( rng=rng, input=h_input, n_in=h_in, n_out=n_hidden[i], activation=T.tanh )) # layer2 = HiddenLayer( # rng, # input=layer2_input, # n_in=nkerns[1] * i_s_2 * i_s_2, # n_out=500, # activation=T.tanh # ) # TODO: classify the values of the fully-connected sigmoidal layer LR_Layer = LogisticRegression( input=DNN_Layers[-1].output, n_in=n_hidden[i], n_out=10 ) # the cost we minimize during training is the NLL of the model cost = LR_Layer.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], LR_Layer.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], LR_Layer.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = LR_Layer.params for layer in DNN_Layers: params += layer.params if gabor is True: print 'gabor params is workings' params += layer1.params else: params += layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)