def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=False): """ Wrapper function for testing adversarial examples """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) filter_model = theano.function( inputs=[index], outputs=[ x, classifier.logRegressionLayer.y_pred, y, classifier.logRegressionLayer.p_y_given_x ], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) filter_output = [filter_model(i) for i in range(n_test_batches)] sample_x = None sample_y = None test_output = None expected_distribution = None for i in filter_output: if numpy.array_equal(i[1], i[2]): sample_x = i[0] sample_y = i[1] expected_distribution = i[3] print("successfully classified sample ", sample_y) t_sample_x, t_sample_y = shared_dataset((sample_x, sample_y)) grad_input = classifier.input + 0.1 * T.sgn( T.grad(cost, classifier.input)) grad_input_fn = theano.function(inputs=[], outputs=grad_input, givens={ x: t_sample_x, y: t_sample_y }) gradient = grad_input_fn() new_t_sample_x, t_sample_y = shared_dataset((gradient, sample_y)) testing_gradient = theano.function( inputs=[], outputs=[ y, classifier.logRegressionLayer.y_pred, classifier.logRegressionLayer.p_y_given_x ], givens={ x: new_t_sample_x, y: t_sample_y }) test_output = testing_gradient() if not numpy.array_equal(test_output[0], test_output[1]): break return test_output, expected_distribution
def test_data_augmentation(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0], "w") train_set_x_d = translate_image(train_set[0], "s") train_set_x_r = translate_image(train_set[0], "d") train_set_x_l = translate_image(train_set[0], "a") # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') output = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) return output
def MY_lenet(learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500, L1_reg=0.00, L2_reg=0.0001): rng = numpy.random.RandomState(23455) ds_rate = None datasets = load_data(ds_rate=ds_rate, theano_shared=False) train_set_x, train_set_y = datasets[0] train_size = train_set_x.shape n_train = train_size[0] ''' print '... Translating images' train_set_x_tran = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = translate_image(img) train_set_x_tran[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) print '... Rotating images' train_set_x_rota = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = rotate_image(img) train_set_x_rota[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) ''' print '... Fliping images' train_set_x_flip = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i], (3, 32, 32))).transpose(1, 2, 0) img_tran = flip_image(img) train_set_x_flip[i] = np.reshape(img_tran.transpose(2, 0, 1), (3 * 32 * 32)) ''' print '... Ennoising images' train_set_x_nois = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = noise_injection(img) train_set_x_aug[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) ''' train_set_x = np.concatenate( ( train_set_x, #train_set_x_tran, #train_set_x_rota, train_set_x_flip), axis=0) train_set_y = np.concatenate( ( train_set_y, #train_set_y, #train_set_y, train_set_y), axis=0) datasets[0] = [train_set_x, train_set_y] train_set_x, train_set_y = shared_dataset(datasets[0]) valid_set_x, valid_set_y = shared_dataset(datasets[1]) test_set_x, test_set_y = shared_dataset(datasets[2]) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels training_enabled = T.iscalar( 'training_enabled' ) # pseudo boolean for switching between training and prediction ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 3, 3), poolsize=(2, 2)) #print 'layer0.output.shape =' #print layer0.output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)}) layerbn = BatchNormalization(input_shape=(batch_size, nkerns[0], 15, 15), mode=1, momentum=0.9) layerbn_output = layerbn.get_result(layer0.output) #print 'layerbn_output.shape =' #print layerbn_output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)}) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng, input=layerbn_output, image_shape=(batch_size, nkerns[0], 15, 15), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer2_input, n_in=nkerns[1] * 6 * 6, n_out=4096, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer3 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer2.output, n_in=4096, n_out=2048, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer4 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer3.output, n_in=2048, n_out=1024, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer5 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer4.output, n_in=1024, n_out=512, activation=T.nnet.relu) # classify the values of the fully-connected sigmoidal layer layer6 = LogisticRegression(input=layer5.output, n_in=512, n_out=10) # L1 norm ; one regularization option is to enforce L1 norm to # be small L1 = (abs(layer2.W).sum() + abs(layer3.W).sum() + abs(layer4.W).sum() + abs(layer5.W).sum() + abs(layer6.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small L2_sqr = ((layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum()) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (layer6.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer6.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], layer6.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) # create a list of all model parameters to be fit by gradient descent params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ''' # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs momentum = theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # end-snippet-1 ############### # TRAIN MODEL # ############### train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose=True)
def test_data_augmentation(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0], 1) train_set_x_d = translate_image(train_set[0], 2) train_set_x_r = translate_image(train_set[0], 3) train_set_x_l = translate_image(train_set[0], 4) # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mynet(learning_rate=0.001, n_epochs=80, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=100, flip_p = 0, rotate_p = 0, translate_p = 0, noise_p = 0 ): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) ds_rate = None ''' Loads the SVHN dataset :type ds_rate: float :param ds_rate: downsample rate; should be larger than 1, if provided. :type theano_shared: boolean :param theano_shared: If true, the function returns the dataset as Theano shared variables. Otherwise, the function returns raw data. ''' if ds_rate is not None: assert(ds_rate > 1.) # Download the CIFAR-10 dataset if it is not present def check_dataset(dataset): # Check if dataset is in the data directory. new_path = os.path.join( os.path.split(__file__)[0], "..", "data", dataset ) #f_name = new_path.replace("src/../data/%s"%dataset, "data/") f_name = os.path.join( os.path.split(__file__)[0], "..", "data" ) if (not os.path.isfile(new_path)): from six.moves import urllib origin = ( 'https://www.cs.toronto.edu/~kriz/' + dataset ) print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, new_path) tar = tarfile.open(new_path) file_names = tar.getnames() for file_name in file_names: tar.extract(file_name,f_name) tar.close() return f_name f_name=check_dataset('cifar-10-matlab.tar.gz') train_batches=os.path.join(f_name,'cifar-10-batches-mat/data_batch_1.mat') # Load data and convert data format train_batches=['data_batch_1.mat','data_batch_2.mat','data_batch_3.mat','data_batch_4.mat','data_batch_5.mat'] train_batch=os.path.join(f_name,'cifar-10-batches-mat',train_batches[0]) train_set=scipy.io.loadmat(train_batch) train_set['data']=train_set['data']/255. for i in range(4): train_batch=os.path.join(f_name,'cifar-10-batches-mat',train_batches[i+1]) temp=scipy.io.loadmat(train_batch) train_set['data']=numpy.concatenate((train_set['data'],temp['data']/255.),axis=0) train_set['labels']=numpy.concatenate((train_set['labels'].flatten(),temp['labels'].flatten()),axis=0) test_batches=os.path.join(f_name,'cifar-10-batches-mat/test_batch.mat') test_set=scipy.io.loadmat(test_batches) test_set['data']=test_set['data']/255. test_set['labels']=test_set['labels'].flatten() train_set=(train_set['data'],train_set['labels']) test_set=(test_set['data'],test_set['labels']) # Downsample the training dataset if specified train_set_len = len(train_set[1]) if ds_rate is not None: train_set_len = int(train_set_len // ds_rate) train_set = [x[:train_set_len] for x in train_set] # Extract validation dataset from train dataset valid_set = [x[-(train_set_len//5):] for x in train_set] train_set = [x[:-(train_set_len//5)] for x in train_set] # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix) # where each row corresponds to an example. target is a # numpy.ndarray of 1 dimension (vector) that has the same length as # the number of rows in the input. It should give the target # to the example with the same index in the input. theano_shared=True if theano_shared: test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] else: rval = [train_set, valid_set, test_set] #return rval datasets = rval train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch training_enabled = T.iscalar('training_enabled') # start-snippet-1 mydata = T.matrix('mydata') x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) print(layer0_input.shape) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(128, 3, 3, 3) ) print('layer 0 constructed....') print(layer0.output) layer01 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, 128, 32, 32), filter_shape=(128, 128, 3, 3) ) layer02 = LeNetConvPoolLayer( rng, input=layer01.output, image_shape=(batch_size, 128, 32, 32), filter_shape=(32, 128, 3, 3) ) ''' layer03 = LeNetConvPoolLayer( rng, input=layer02.output, image_shape=(batch_size, 32, 32, 32), filter_shape=(32, 32, 5, 5) ) layer04 = LeNetConvPoolLayer( rng, input=layer03.output, image_shape=(batch_size, 32, 32, 32), filter_shape=(32, 32, 5, 5) ) layer05 = LeNetConvPoolLayer( rng, input=layer04.output, image_shape=(batch_size, 32, 32, 32), filter_shape=(32, 32, 5, 5) ) ''' layer06 = theano.tensor.signal.pool.pool_2d(layer02.output, (2,2), ignore_border = True) print('layer 01 constructed....') print(layer01) layer1 = LeNetConvPoolLayer( rng, input=layer06, image_shape=(batch_size, 32, 16, 16), filter_shape=(32, 32, 3, 3) ) layer2 = theano.tensor.signal.pool.pool_2d(layer1.output, (2,2), ignore_border = True) layer3 = LeNetConvPoolLayer(rng, input = layer2, image_shape = (batch_size,32,8,8),filter_shape=(32,32,3,3), poolsize=(1,1)) layer4 = theano.tensor.signal.pool.pool_2d(layer3.output, (2,2), ignore_border = True) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer5_input = layer4.flatten(2) # construct a fully-connected sigmoidal layer layer5 = DropoutHiddenLayer( is_train= training_enabled, rng=rng, input=layer5_input, n_in=32*4*4, n_out=4096, W=None, b=None, activation=theano.tensor.nnet.relu, p=0.7 ) # construct a fully-connected sigmoidal layer layer6 = DropoutHiddenLayer( is_train= training_enabled, rng=rng, input=layer5.output, n_in=4096, n_out=512, W=None, b=None, activation=theano.tensor.nnet.relu, p=0.7 ) layer62 = DropoutHiddenLayer( is_train= training_enabled, rng=rng, input=layer6.output, n_in=512, n_out=512, W=None, b=None, activation=theano.tensor.nnet.relu, p=0.7 ) L2_reg=0.0001 # classify the values of the fully-connected sigmoidal layer layer7 = LogisticRegression(input=layer62.output, n_in=512, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer7.negative_log_likelihood(y) L2_sqr = ( (layer7.W ** 2).sum() ) cost = cost# + L2_sqr # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer7.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) } ) validate_model = theano.function( [index], layer7.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) } ) # create a list of all model parameters to be fit by gradient descent params = layer62.params + layer6.params + layer7.params + layer5.params + layer3.params + layer1.params + layer0.params + layer01.params #+ layer02.params + layer03.params+ layer04.params + layer05.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. """ The MIT License (MIT) Copyright (c) 2015 Alec Radford Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): updates = [] grads = T.grad(cost, params) i = theano.shared(floatX(0.)) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates updates = Adam(cost, params) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) } ) train_model_augmented = theano.function( [mydata, index], cost, updates=updates, givens={ x: mydata, y: train_set_y[index * batch_size: (index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) } ) # end-snippet-1 # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (not done_looping): epoch = epoch + 1 if(epoch>=50): break ''' train_set[0] = noise_image(train_set[0], 0.05) train_set[0] = flip_image(train_set[0],0.5) train_set[0] = rotate_image(train_set[0],0.15) train_set[0] = translate_image(train_set[0],0.15) train_set_x, train_set_y = shared_dataset(train_set) ''' for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) temp_data = train_set_x.get_value() data = temp_data[minibatch_index * batch_size: (minibatch_index+1) *batch_size] data = noise_image(data, 0.05) data = rotate_image(data, 0.15) data = translate_image(data, 0.25) data = flip_image(data,0.5) #mydata.set_value(data) cost_ij = train_model_augmented(data, minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + ' ran for %.2fm' % ((end_time - start_time) / 60.))) #train_set = numpy.asarray(train_set) #valid_set = numpy.asarray(valid_set) #print(numpy.shape(train_set)) #print(numpy.shape(valid_set)) #train_set.append(valid_set) #print(train_set.shape()) #train_set_x, train_set_y = shared_dataset(train_set) #n_train_batches = train_set_x.get_value(borrow=True).shape[0] #n_test_batches = test_set_x.get_value(borrow=True).shape[0] #n_train_batches //= batch_size '''