예제 #1
0
def test_adversarial_example(learning_rate=0.01,
                             L1_reg=0.00,
                             L2_reg=0.0001,
                             n_epochs=100,
                             batch_size=128,
                             n_hidden=500,
                             n_hiddenLayers=3,
                             verbose=False,
                             smaller_set=False):
    """
    Wrapper function for testing adversarial examples
    """
    # load the dataset; download the dataset if it is not present
    if smaller_set:
        datasets = load_data(ds_rate=5)
    else:
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=32 * 32 * 3,
                       n_hidden=n_hidden,
                       n_out=10,
                       n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model, n_train_batches,
             n_valid_batches, n_test_batches, n_epochs, verbose)

    filter_model = theano.function(
        inputs=[index],
        outputs=[
            x, classifier.logRegressionLayer.y_pred, y,
            classifier.logRegressionLayer.p_y_given_x
        ],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    filter_output = [filter_model(i) for i in range(n_test_batches)]

    sample_x = None
    sample_y = None
    test_output = None
    expected_distribution = None
    for i in filter_output:
        if numpy.array_equal(i[1], i[2]):
            sample_x = i[0]
            sample_y = i[1]
            expected_distribution = i[3]
            print("successfully classified sample ", sample_y)
            t_sample_x, t_sample_y = shared_dataset((sample_x, sample_y))
            grad_input = classifier.input + 0.1 * T.sgn(
                T.grad(cost, classifier.input))
            grad_input_fn = theano.function(inputs=[],
                                            outputs=grad_input,
                                            givens={
                                                x: t_sample_x,
                                                y: t_sample_y
                                            })
            gradient = grad_input_fn()
            new_t_sample_x, t_sample_y = shared_dataset((gradient, sample_y))
            testing_gradient = theano.function(
                inputs=[],
                outputs=[
                    y, classifier.logRegressionLayer.y_pred,
                    classifier.logRegressionLayer.p_y_given_x
                ],
                givens={
                    x: new_t_sample_x,
                    y: t_sample_y
                })
            test_output = testing_gradient()
            if not numpy.array_equal(test_output[0], test_output[1]):
                break

    return test_output, expected_distribution
예제 #2
0
def test_data_augmentation(learning_rate=0.01,
                           L1_reg=0.00,
                           L2_reg=0.0001,
                           n_epochs=100,
                           batch_size=128,
                           n_hidden=500,
                           n_hiddenLayers=3,
                           verbose=False):
    """
    Wrapper function for experiment of data augmentation

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    rng = numpy.random.RandomState(23455)

    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset in raw Format, since we need to preprocess it
    train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False)

    # Repeat the training set 5 times
    train_set[1] = numpy.tile(train_set[1], 5)

    # TODO: translate the dataset
    train_set_x_u = translate_image(train_set[0], "w")
    train_set_x_d = translate_image(train_set[0], "s")
    train_set_x_r = translate_image(train_set[0], "d")
    train_set_x_l = translate_image(train_set[0], "a")

    # Stack the original dataset and the synthesized datasets
    train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d,
                                 train_set_x_r, train_set_x_l))

    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=32 * 32 * 3,
                       n_hidden=n_hidden,
                       n_hiddenLayers=n_hiddenLayers,
                       n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    output = train_nn(train_model, validate_model, test_model, n_train_batches,
                      n_valid_batches, n_test_batches, n_epochs, verbose)
    return output
예제 #3
0
def MY_lenet(learning_rate=0.1,
             n_epochs=200,
             nkerns=[20, 50],
             batch_size=500,
             L1_reg=0.00,
             L2_reg=0.0001):

    rng = numpy.random.RandomState(23455)

    ds_rate = None
    datasets = load_data(ds_rate=ds_rate, theano_shared=False)

    train_set_x, train_set_y = datasets[0]
    train_size = train_set_x.shape
    n_train = train_size[0]
    '''
    print '... Translating images'
    train_set_x_tran = np.empty(train_size)      
    for i in range(n_train):
        img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0)
        img_tran = translate_image(img)
        train_set_x_tran[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32))

    print '... Rotating images'
    train_set_x_rota = np.empty(train_size)     
    for i in range(n_train):
        img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0)
        img_tran = rotate_image(img)
        train_set_x_rota[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32))
    '''
    print '... Fliping images'
    train_set_x_flip = np.empty(train_size)
    for i in range(n_train):
        img = (np.reshape(train_set_x[i], (3, 32, 32))).transpose(1, 2, 0)
        img_tran = flip_image(img)
        train_set_x_flip[i] = np.reshape(img_tran.transpose(2, 0, 1),
                                         (3 * 32 * 32))
    '''
    print '... Ennoising images'
    train_set_x_nois = np.empty(train_size)
    for i in range(n_train):
        img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0)
        img_tran = noise_injection(img)
        train_set_x_aug[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32))
    '''

    train_set_x = np.concatenate(
        (
            train_set_x,
            #train_set_x_tran,
            #train_set_x_rota,
            train_set_x_flip),
        axis=0)
    train_set_y = np.concatenate(
        (
            train_set_y,
            #train_set_y,
            #train_set_y,
            train_set_y),
        axis=0)

    datasets[0] = [train_set_x, train_set_y]

    train_set_x, train_set_y = shared_dataset(datasets[0])
    valid_set_x, valid_set_y = shared_dataset(datasets[1])
    test_set_x, test_set_y = shared_dataset(datasets[2])

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    training_enabled = T.iscalar(
        'training_enabled'
    )  # pseudo boolean for switching between training and prediction

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, 32, 32),
                                filter_shape=(nkerns[0], 3, 3, 3),
                                poolsize=(2, 2))
    #print 'layer0.output.shape ='
    #print layer0.output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)})

    layerbn = BatchNormalization(input_shape=(batch_size, nkerns[0], 15, 15),
                                 mode=1,
                                 momentum=0.9)
    layerbn_output = layerbn.get_result(layer0.output)
    #print 'layerbn_output.shape ='
    #print layerbn_output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)})

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layerbn_output,
                                image_shape=(batch_size, nkerns[0], 15, 15),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = DropoutHiddenLayer(rng,
                                is_train=training_enabled,
                                input=layer2_input,
                                n_in=nkerns[1] * 6 * 6,
                                n_out=4096,
                                activation=T.nnet.relu)

    # construct a fully-connected sigmoidal layer
    layer3 = DropoutHiddenLayer(rng,
                                is_train=training_enabled,
                                input=layer2.output,
                                n_in=4096,
                                n_out=2048,
                                activation=T.nnet.relu)

    # construct a fully-connected sigmoidal layer
    layer4 = DropoutHiddenLayer(rng,
                                is_train=training_enabled,
                                input=layer3.output,
                                n_in=2048,
                                n_out=1024,
                                activation=T.nnet.relu)

    # construct a fully-connected sigmoidal layer
    layer5 = DropoutHiddenLayer(rng,
                                is_train=training_enabled,
                                input=layer4.output,
                                n_in=1024,
                                n_out=512,
                                activation=T.nnet.relu)

    # classify the values of the fully-connected sigmoidal layer
    layer6 = LogisticRegression(input=layer5.output, n_in=512, n_out=10)

    # L1 norm ; one regularization option is to enforce L1 norm to
    # be small
    L1 = (abs(layer2.W).sum() + abs(layer3.W).sum() + abs(layer4.W).sum() +
          abs(layer5.W).sum() + abs(layer6.W).sum())

    # square of L2 norm ; one regularization option is to enforce
    # square of L2 norm to be small
    L2_sqr = ((layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() +
              (layer5.W**2).sum() + (layer6.W**2).sum())

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (layer6.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer6.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    validate_model = theano.function(
        [index],
        layer6.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    '''
    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]
    '''

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    momentum = theano.shared(numpy.cast[theano.config.floatX](0.5),
                             name='momentum')
    updates = []
    for param in params:
        param_update = theano.shared(param.get_value() *
                                     numpy.cast[theano.config.floatX](0.))
        updates.append((param, param - learning_rate * param_update))
        updates.append((param_update, momentum * param_update +
                        (numpy.cast[theano.config.floatX](1.) - momentum) *
                        T.grad(cost, param)))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](1)
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    train_nn(train_model,
             validate_model,
             test_model,
             n_train_batches,
             n_valid_batches,
             n_test_batches,
             n_epochs,
             verbose=True)
def test_data_augmentation(learning_rate=0.01,
             L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3,
             verbose=False):
    """
    Wrapper function for experiment of data augmentation

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    rng = numpy.random.RandomState(23455)

    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset in raw Format, since we need to preprocess it
    train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False)

    # Repeat the training set 5 times
    train_set[1] = numpy.tile(train_set[1], 5)

    # TODO: translate the dataset
    train_set_x_u = translate_image(train_set[0], 1)
    train_set_x_d = translate_image(train_set[0], 2)
    train_set_x_r = translate_image(train_set[0], 3)
    train_set_x_l = translate_image(train_set[0], 4)

    # Stack the original dataset and the synthesized datasets
    train_set[0] = numpy.vstack((train_set[0],
                       train_set_x_u,
                       train_set_x_d,
                       train_set_x_r,
                       train_set_x_l))

    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mynet(learning_rate=0.001, n_epochs=80,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=100, flip_p = 0, rotate_p = 0, translate_p = 0, noise_p = 0 ):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)
    
    ds_rate = None
    ''' Loads the SVHN dataset

    :type ds_rate: float
    :param ds_rate: downsample rate; should be larger than 1, if provided.

    :type theano_shared: boolean
    :param theano_shared: If true, the function returns the dataset as Theano
    shared variables. Otherwise, the function returns raw data.
    '''
    if ds_rate is not None:
        assert(ds_rate > 1.)

    # Download the CIFAR-10 dataset if it is not present
    def check_dataset(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.path.split(__file__)[0],
            "..",
            "data",
            dataset
        )
        #f_name = new_path.replace("src/../data/%s"%dataset, "data/") 
        f_name = os.path.join(
            os.path.split(__file__)[0],
            "..",
            "data"
        )
        if (not os.path.isfile(new_path)):
            from six.moves import urllib
            origin = (
                'https://www.cs.toronto.edu/~kriz/' + dataset
            )
            print('Downloading data from %s' % origin)
            urllib.request.urlretrieve(origin, new_path) 
             
        tar = tarfile.open(new_path)
        file_names = tar.getnames()
        for file_name in file_names:
            tar.extract(file_name,f_name)
        tar.close()              
        
        return f_name
    
    f_name=check_dataset('cifar-10-matlab.tar.gz')
    
    train_batches=os.path.join(f_name,'cifar-10-batches-mat/data_batch_1.mat')
    
    
    # Load data and convert data format
    train_batches=['data_batch_1.mat','data_batch_2.mat','data_batch_3.mat','data_batch_4.mat','data_batch_5.mat']
    train_batch=os.path.join(f_name,'cifar-10-batches-mat',train_batches[0])
    train_set=scipy.io.loadmat(train_batch)
    train_set['data']=train_set['data']/255.
    for i in range(4):
        train_batch=os.path.join(f_name,'cifar-10-batches-mat',train_batches[i+1])
        temp=scipy.io.loadmat(train_batch)
        train_set['data']=numpy.concatenate((train_set['data'],temp['data']/255.),axis=0)
        train_set['labels']=numpy.concatenate((train_set['labels'].flatten(),temp['labels'].flatten()),axis=0)
    
    test_batches=os.path.join(f_name,'cifar-10-batches-mat/test_batch.mat')
    test_set=scipy.io.loadmat(test_batches)
    test_set['data']=test_set['data']/255.
    test_set['labels']=test_set['labels'].flatten()
    
    train_set=(train_set['data'],train_set['labels'])
    test_set=(test_set['data'],test_set['labels'])
    

    # Downsample the training dataset if specified
    train_set_len = len(train_set[1])
    if ds_rate is not None:
        train_set_len = int(train_set_len // ds_rate)
        train_set = [x[:train_set_len] for x in train_set]

    # Extract validation dataset from train dataset
    valid_set = [x[-(train_set_len//5):] for x in train_set]
    train_set = [x[:-(train_set_len//5)] for x in train_set]

    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix)
    # where each row corresponds to an example. target is a
    # numpy.ndarray of 1 dimension (vector) that has the same length as
    # the number of rows in the input. It should give the target
    # to the example with the same index in the input.
    theano_shared=True
    if theano_shared:
        test_set_x, test_set_y = shared_dataset(test_set)
        valid_set_x, valid_set_y = shared_dataset(valid_set)
        train_set_x, train_set_y = shared_dataset(train_set)

        rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    else:
        rval = [train_set, valid_set, test_set]

    #return rval
    datasets = rval

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    training_enabled = T.iscalar('training_enabled')
    # start-snippet-1
    mydata = T.matrix('mydata')
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    print(layer0_input.shape)


    
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(128, 3, 3, 3)
    )
    
    print('layer 0 constructed....')
    print(layer0.output)
    layer01 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, 128, 32, 32),
        filter_shape=(128, 128, 3, 3)
       
    )
    
    layer02 = LeNetConvPoolLayer(
        rng,
        input=layer01.output,
        image_shape=(batch_size, 128, 32, 32),
        filter_shape=(32, 128, 3, 3)
        
    )
    '''
    layer03 = LeNetConvPoolLayer(
        rng,
        input=layer02.output,
        image_shape=(batch_size, 32, 32, 32),
        filter_shape=(32, 32, 5, 5)
        
    )
    layer04 = LeNetConvPoolLayer(
        rng,
        input=layer03.output,
        image_shape=(batch_size, 32, 32, 32),
        filter_shape=(32, 32, 5, 5)
        
    )
    layer05 = LeNetConvPoolLayer(
        rng,
        input=layer04.output,
        image_shape=(batch_size, 32, 32, 32),
        filter_shape=(32, 32, 5, 5)
       
    )
    '''
    layer06 = theano.tensor.signal.pool.pool_2d(layer02.output, (2,2), ignore_border = True)
    
    
    
    print('layer 01 constructed....')
    print(layer01)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer06,
        image_shape=(batch_size, 32, 16, 16),
        filter_shape=(32, 32, 3, 3)
    
    )
    
    layer2 = theano.tensor.signal.pool.pool_2d(layer1.output, (2,2), ignore_border = True)
    

    layer3 = LeNetConvPoolLayer(rng, input = layer2, image_shape = (batch_size,32,8,8),filter_shape=(32,32,3,3), poolsize=(1,1))
    
    layer4 = theano.tensor.signal.pool.pool_2d(layer3.output, (2,2), ignore_border = True)
    

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer5_input = layer4.flatten(2)

    # construct a fully-connected sigmoidal layer
    
    layer5 = DropoutHiddenLayer(
        
        is_train= training_enabled,
        rng=rng,
        input=layer5_input,
        n_in=32*4*4,
        n_out=4096,
        W=None,
        b=None,
        activation=theano.tensor.nnet.relu,
        p=0.7
    )
    
    
    
    
    # construct a fully-connected sigmoidal layer
    layer6 = DropoutHiddenLayer(
        is_train= training_enabled,
        rng=rng,
        input=layer5.output,
        n_in=4096,
        n_out=512,
        W=None,
        b=None,
        activation=theano.tensor.nnet.relu,
        p=0.7
    )
    layer62 = DropoutHiddenLayer(
        
        is_train= training_enabled,
        rng=rng,
        input=layer6.output,
        n_in=512,
        n_out=512,
        W=None,
        b=None,
        activation=theano.tensor.nnet.relu,
        p=0.7
    )
    L2_reg=0.0001
    
    # classify the values of the fully-connected sigmoidal layer
    layer7 = LogisticRegression(input=layer62.output, n_in=512, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer7.negative_log_likelihood(y)

    L2_sqr = (
        
         (layer7.W ** 2).sum()
    )
    cost = cost# + L2_sqr
    
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer7.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        }
    )

    validate_model = theano.function(
        [index],
        layer7.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer62.params + layer6.params + layer7.params + layer5.params  + layer3.params + layer1.params + layer0.params + layer01.params #+ layer02.params + layer03.params+ layer04.params + layer05.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    
   
    
    """
The MIT License (MIT)

Copyright (c) 2015 Alec Radford

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""    

    def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        i = theano.shared(floatX(0.))
        i_t = i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((i, i_t))
        return updates

    updates = Adam(cost, params)
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](1)
        }
        
    )
    train_model_augmented = theano.function(
        [mydata, index],
        
        cost,
        updates=updates,
        givens={
            x: mydata,
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](1)
        }
        
    )
    # end-snippet-1
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (not done_looping):
        epoch = epoch + 1
        if(epoch>=50):
            break
        '''
        train_set[0] = noise_image(train_set[0], 0.05)
        train_set[0] = flip_image(train_set[0],0.5)
        train_set[0] = rotate_image(train_set[0],0.15)
        train_set[0] = translate_image(train_set[0],0.15)
        
        train_set_x, train_set_y = shared_dataset(train_set)
        '''
        for minibatch_index in range(n_train_batches):
            
            iter = (epoch - 1) * n_train_batches + minibatch_index
        
            if iter % 100 == 0:
                print('training @ iter = ', iter)
            
            temp_data = train_set_x.get_value() 
            data = temp_data[minibatch_index * batch_size:  (minibatch_index+1) *batch_size]
            data = noise_image(data, 0.05)
            data = rotate_image(data, 0.15)
            data = translate_image(data, 0.25)
            data = flip_image(data,0.5)
            #mydata.set_value(data)
            cost_ij = train_model_augmented(data, minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           
           ' ran for %.2fm' % ((end_time - start_time) / 60.)))
    #train_set = numpy.asarray(train_set)
    #valid_set = numpy.asarray(valid_set)
    #print(numpy.shape(train_set))
    #print(numpy.shape(valid_set))
    #train_set.append(valid_set)
    #print(train_set.shape())
    #train_set_x, train_set_y = shared_dataset(train_set)
    #n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    #n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    #n_train_batches //= batch_size
    '''