Exemple #1
0
def svm_cva(learning_rate=3e-4, n_epochs=10000, dirs=None, batch_size=500):
    print learning_rate, batch_size

    datasets = datapy.load_data_svhn_features(dirs, have_matrix=True)
    train_set_x, train_set_y, train_y_matrix = datasets[0]
    test_set_x, test_set_y, test_y_matrix = datasets[1]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[2]

    #datasets = datapy.load_data_svhn(dataset, have_matrix=False)
    #train_set_x, train_set_y = datasets[0]
    #test_set_x, test_set_y = datasets[1]
    #valid_set_x, valid_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels
    '''
    Differences
    '''

    y_matrix = T.imatrix(
        'y_matrix')  # labels, presented as 2D matrix of int labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    rng = np.random.RandomState(0)

    classifier = Pegasos.Pegasos(input=x,
                                 rng=rng,
                                 n_in=4 * 4 * 96,
                                 n_out=10,
                                 weight_decay=2e-6,
                                 loss=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.objective(10, y, y_matrix)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    params = [classifier.W, classifier.b]
    grads = [g_W, g_b]

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.

    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate)
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001)
    updates = get_optimizer(params, grads)

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            y_matrix:
            train_y_matrix[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 50000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = 200
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [
                    test_model(i) for i in xrange(n_test_batches)
                ]
                this_test_score = np.mean(this_test_losses)

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100, this_test_score * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
    print best_test_score
def svm_cva(dir, start=0, end=500, learning_rate=3e-4, n_epochs=10000,
                           dataset='./data/mnist.pkl.gz',
                           batch_size=500):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """

    ''' 
    Difference
    '''
    print start, end, learning_rate, batch_size

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    _, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    train_set_x, valid_set_x, test_set_x = datapy.load_feature_gpu(dir=dir, start=start,end=end)

    print train_set_x.get_value().shape
    print valid_set_x.get_value().shape
    print test_set_x.get_value().shape

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    '''
    Differences
    '''

    y_matrix = T.imatrix('y_matrix') # labels, presented as 2D matrix of int labels 

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    rng = np.random.RandomState(0)
    n_in=end-start
    classifier = Pegasos.Pegasos(input=x, rng=rng, n_in=n_in, n_out=10,  weight_decay=1e-4, loss=1)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.objective(10, y, y_matrix)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    params = [classifier.W, classifier.b]
    grads = [g_W, g_b]

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate)
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001)
    updates = get_optimizer(params,grads)
    

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()

    logdir = dir + str(learning_rate)+'_c-'

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                this_test_score = np.mean(this_test_losses)

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, (
                        'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100,
                            this_test_score *100.
                        )
                    )
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100,
                        this_test_score *100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    with open(logdir+'hook.txt', 'a') as f:
                        print >>f,(
                            (
                                '     epoch %i, minibatch %i/%i, test error of'
                                ' best model %f %%'
                            ) %
                            (
                                epoch,
                                minibatch_index + 1,
                                n_train_batches,
                                test_score * 100.
                            )
                        )

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    with open(logdir+'hook.txt', 'a') as f:
        print>>f,(
            (
                'Optimization complete with best validation score of %f %%,'
                'with test performance %f %%'
            )
            % (best_validation_loss * 100., test_score * 100.)
        )
        print>>f, 'The code run for %d epochs, with %f epochs/sec' % (
            epoch, 1. * epoch / (end_time - start_time))
        print>>f, sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.1fs' % ((end_time - start_time)))
        print>>f, best_test_score

    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
    print best_test_score
def deep_cnn_6layer_mnist_50000(learning_rate=3e-4,
            n_epochs=250,
            dataset='mnist.pkl.gz',
            batch_size=500,
            dropout_flag=0,
            seed=0,
            activation=None):
    
    #cp->cd->cpd->cd->c
    nkerns=[32, 32, 64, 64, 64]
    drops=[1, 0, 1, 0, 0]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden=[500]

    
    logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_'+str(nkerns)+str(drops)+str(n_hidden)+'_'+str(learning_rate)+'_'+str(int(time.time()))+'/'
    if dropout_flag==1:
        logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_'+str(nkerns)+str(drops)+str(n_hidden)+'_'+str(learning_rate)+'_dropout_'+str(int(time.time()))+'/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    print 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag
    with open(logdir+'hook.txt', 'a') as f:
        print >>f, 'logdir:', logdir
        print >>f, 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag

    rng = np.random.RandomState(0)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)
    '''
    '''
    datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True)

    train_set_x, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    test_set_x, test_set_y, test_y_matrix = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    '''
    dropout
    '''
    drop = T.iscalar('drop')

    y_matrix = T.imatrix('y_matrix') # labels, presented as 2D matrix of int labels 

    print '... building the model'

    layer0_input = x.reshape((batch_size, 1, 28, 28))
    
    if activation =='nonlinearity.relu':
        activation = nonlinearity.relu
    elif activation =='nonlinearity.tanh':
        activation = nonlinearity.tanh
    elif activation =='nonlinearity.softplus':
        activation = nonlinearity.softplus
    
    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2),
        border_mode='valid', 
        activation=activation
    ))
    if drops[0]==1:
        cnn_output.append(recg_layer[-1].drop_output(layer0_input, drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(layer0_input))

    #2
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[1]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #3
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[1], 12, 12),
        filter_shape=(nkerns[2], nkerns[1], 3, 3),
        poolsize=(2, 2),
        border_mode='valid', 
        activation=activation
    ))
    if drops[2]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[2], 5, 5),
        filter_shape=(nkerns[3], nkerns[2], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[3]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[3], 5, 5),
        filter_shape=(nkerns[4], nkerns[3], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[4]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input = cnn_output[-1].flatten(2)

    recg_layer.append(FullyConnected.FullyConnected(
        rng=rng,
        n_in=nkerns[4] * 5 * 5,
        n_out=500,
        activation=activation
    ))

    feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share)

    # classify the values of the fully-connected sigmoidal layer
    classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1)

    # the cost we minimize during training is the NLL of the model
    cost = classifier.hinge_loss(10, y, y_matrix) * batch_size
    weight_decay=1.0/n_train_batches

    # create a list of all model parameters to be fit by gradient descent
    params=[]
    for r in recg_layer:
        params+=r.params
    params += classifier.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay)
    updates = get_optimizer(params,grads)

    '''
    Save parameters and activations
    '''

    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    train_model_average = theano.function(
        inputs=[index],
        outputs=[cost, classifier.errors(y)],
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](dropout_flag)
        }
    )

    train_model = theano.function(
        inputs=[index],
        outputs=[cost, classifier.errors(y)],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](dropout_flag)
        }
    )

    print '... training'
    # early-stopping parameters
    patience = n_train_batches * 100  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()
    epoch = 0
    decay_epochs = 150

    while (epoch < n_epochs):
        epoch = epoch + 1
        tmp1 = time.clock()

        minibatch_avg_cost = 0
        train_error = 0

        for minibatch_index in xrange(n_train_batches):

            co, te = train_model(minibatch_index)
            minibatch_avg_cost+=co
            train_error+=te
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                test_epoch = epoch - decay_epochs
                if test_epoch > 0 and test_epoch % 10 == 0:
                    print l_r.get_value()
                    with open(logdir+'hook.txt', 'a') as f:
                        print >>f,l_r.get_value()
                    l_r.set_value(np.cast['float32'](l_r.get_value()/3.0))

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                this_test_score = np.mean(this_test_losses)

                train_thing = [train_model_average(i) for i in xrange(n_train_batches)]
                train_thing = np.mean(train_thing, axis=0)
                        
                print epoch, 'hinge loss and training error', train_thing
                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, epoch, 'hinge loss and training error', train_thing

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100,
                        this_test_score *100.
                    )
                )
                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, (
                        'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100,
                            this_test_score *100.
                        )
                    )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )
                    with open(logdir+'hook.txt', 'a') as f:
                        print >>f, (
                            (
                                '     epoch %i, minibatch %i/%i, test error of'
                                ' best model %f %%'
                            ) %
                            (
                                epoch,
                                minibatch_index + 1,
                                n_train_batches,
                                test_score * 100.
                            )
                        )
        
        if epoch%50==0:
            model = parameters()
            for i in xrange(len(model)):
                model[i] = np.asarray(model[i]).astype(np.float32)
            np.savez(logdir+'model-'+str(epoch), model=model)

        print 'hinge loss and training error', minibatch_avg_cost / float(n_train_batches), train_error / float(n_train_batches)
        print 'time', time.clock() - tmp1
        with open(logdir+'hook.txt', 'a') as f:
            print >>f,'hinge loss and training error', minibatch_avg_cost / float(n_train_batches), train_error / float(n_train_batches)
            print >>f,'time', time.clock() - tmp1

    end_time = time.clock()
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
def c_6layer_mnist_imputation(seed=0,
                              pertub_type=3,
                              pertub_prob=6,
                              pertub_prob1=14,
                              predir=None,
                              n_batch=144,
                              dataset='mnist.pkl.gz',
                              batch_size=500):
    """
    Missing data imputation
    """
    #cp->cd->cpd->cd->c
    nkerns = [32, 32, 64, 64, 64]
    drops = [0, 0, 0, 0, 0, 1]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden = [500, 50]
    drop_inverses = [
        1,
    ]
    # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28

    if dataset == 'mnist.pkl.gz':
        dim_input = (28, 28)
        colorImg = False

    train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(
        dirs='data_imputation/',
        pertub_type=pertub_type,
        pertub_prob=pertub_prob,
        pertub_prob1=pertub_prob1)

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')
    #x_pertub = T.matrix('x_pertub')  # the data is presented as rasterized images
    #p_label = T.matrix('p_label')

    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    y_matrix = T.imatrix('y_matrix')

    drop = T.iscalar('drop')
    drop_inverse = T.iscalar('drop_inverse')

    activation = nonlinearity.relu

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    input_x = x.reshape((batch_size, 1, 28, 28))

    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[0] == 1:
        cnn_output.append(recg_layer[-1].drop_output(input=input_x,
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(input=input_x))

    #2
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[1] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #3
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[1], 12, 12),
                                filter_shape=(nkerns[2], nkerns[1], 3, 3),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[2] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[2], 5, 5),
                                filter_shape=(nkerns[3], nkerns[2], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[3] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[3], 5, 5),
                                filter_shape=(nkerns[4], nkerns[3], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[4] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input = cnn_output[-1].flatten(2)

    recg_layer.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=nkerns[4] * 5 * 5,
                                      n_out=500,
                                      activation=activation))

    feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share)

    # classify the values of the fully-connected sigmoidal layer
    classifier = Pegasos.Pegasos(input=feature,
                                 rng=rng,
                                 n_in=500,
                                 n_out=10,
                                 weight_decay=0,
                                 loss=1)

    # the cost we minimize during training is the NLL of the model
    cost = classifier.hinge_loss(10, y, y_matrix) * batch_size
    weight_decay = 1.0 / n_train_batches

    # create a list of all model parameters to be fit by gradient descent
    params = []
    for r in recg_layer:
        params += r.params
    params += classifier.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    learning_rate = 3e-4
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001,
                                                     weight_decay=weight_decay)
    updates = get_optimizer(params, grads)
    '''
    Save parameters and activations
    '''

    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    test_pertub_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x_pertub[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    ##################
    # Pretrain MODEL #
    ##################

    model_epoch = 250
    if os.environ.has_key('model_epoch'):
        model_epoch = int(os.environ['model_epoch'])
    if predir is not None:
        color.printBlue('... setting parameters')
        color.printBlue(predir)
        if model_epoch == -1:
            pre_train = np.load(predir + 'best-model.npz')
        else:
            pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz')
        pre_train = pre_train['model']
        for (para, pre) in zip(params, pre_train):
            para.set_value(pre)
    else:
        exit()

    ###############
    # TRAIN MODEL #
    ###############
    valid_losses = [validate_model(i) for i in xrange(n_valid_batches)]
    valid_score = np.mean(valid_losses)

    test_losses = [test_model(i) for i in xrange(n_test_batches)]
    test_score = np.mean(test_losses)

    test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)]
    test_score_pertub = np.mean(test_losses_pertub)

    print valid_score, test_score, test_score_pertub
Exemple #5
0
def deep_cnn_6layer_mnist_50000(learning_rate=3e-4,
                                n_epochs=250,
                                dataset='mnist.pkl.gz',
                                batch_size=500,
                                dropout_flag=0,
                                seed=0,
                                activation=None):

    #cp->cd->cpd->cd->c
    nkerns = [32, 32, 64, 64, 64]
    drops = [1, 0, 1, 0, 0]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden = [500]

    logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_' + str(
        nkerns) + str(drops) + str(n_hidden) + '_' + str(
            learning_rate) + '_' + str(int(time.time())) + '/'
    if dropout_flag == 1:
        logdir = 'results/supervised/cnn/mnist/deep_cnn_6layer_50000_' + str(
            nkerns) + str(drops) + str(n_hidden) + '_' + str(
                learning_rate) + '_dropout_' + str(int(time.time())) + '/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    print 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag
    with open(logdir + 'hook.txt', 'a') as f:
        print >> f, 'logdir:', logdir
        print >> f, 'deep_cnn_6layer_mnist_50000_', nkerns, n_hidden, drops, seed, dropout_flag

    rng = np.random.RandomState(0)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)
    '''
    '''
    datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True)

    train_set_x, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    test_set_x, test_set_y, test_y_matrix = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    '''
    dropout
    '''
    drop = T.iscalar('drop')

    y_matrix = T.imatrix(
        'y_matrix')  # labels, presented as 2D matrix of int labels

    print '... building the model'

    layer0_input = x.reshape((batch_size, 1, 28, 28))

    if activation == 'nonlinearity.relu':
        activation = nonlinearity.relu
    elif activation == 'nonlinearity.tanh':
        activation = nonlinearity.tanh
    elif activation == 'nonlinearity.softplus':
        activation = nonlinearity.softplus

    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[0] == 1:
        cnn_output.append(recg_layer[-1].drop_output(layer0_input,
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(layer0_input))

    #2
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[1] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #3
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[1], 12, 12),
                                filter_shape=(nkerns[2], nkerns[1], 3, 3),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[2] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[2], 5, 5),
                                filter_shape=(nkerns[3], nkerns[2], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[3] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[3], 5, 5),
                                filter_shape=(nkerns[4], nkerns[3], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[4] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input = cnn_output[-1].flatten(2)

    recg_layer.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=nkerns[4] * 5 * 5,
                                      n_out=500,
                                      activation=activation))

    feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share)

    # classify the values of the fully-connected sigmoidal layer
    classifier = Pegasos.Pegasos(input=feature,
                                 rng=rng,
                                 n_in=500,
                                 n_out=10,
                                 weight_decay=0,
                                 loss=1)

    # the cost we minimize during training is the NLL of the model
    cost = classifier.hinge_loss(10, y, y_matrix) * batch_size
    weight_decay = 1.0 / n_train_batches

    # create a list of all model parameters to be fit by gradient descent
    params = []
    for r in recg_layer:
        params += r.params
    params += classifier.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001,
                                                     weight_decay=weight_decay)
    updates = get_optimizer(params, grads)
    '''
    Save parameters and activations
    '''

    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    train_model_average = theano.function(
        inputs=[index],
        outputs=[cost, classifier.errors(y)],
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            y_matrix:
            train_y_matrix[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](dropout_flag)
        })

    train_model = theano.function(
        inputs=[index],
        outputs=[cost, classifier.errors(y)],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            y_matrix:
            train_y_matrix[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](dropout_flag)
        })

    print '... training'
    # early-stopping parameters
    patience = n_train_batches * 100  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()
    epoch = 0
    decay_epochs = 150

    while (epoch < n_epochs):
        epoch = epoch + 1
        tmp1 = time.clock()

        minibatch_avg_cost = 0
        train_error = 0

        for minibatch_index in xrange(n_train_batches):

            co, te = train_model(minibatch_index)
            minibatch_avg_cost += co
            train_error += te
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                test_epoch = epoch - decay_epochs
                if test_epoch > 0 and test_epoch % 10 == 0:
                    print l_r.get_value()
                    with open(logdir + 'hook.txt', 'a') as f:
                        print >> f, l_r.get_value()
                    l_r.set_value(np.cast['float32'](l_r.get_value() / 3.0))

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [
                    test_model(i) for i in xrange(n_test_batches)
                ]
                this_test_score = np.mean(this_test_losses)

                train_thing = [
                    train_model_average(i) for i in xrange(n_train_batches)
                ]
                train_thing = np.mean(train_thing, axis=0)

                print epoch, 'hinge loss and training error', train_thing
                with open(logdir + 'hook.txt', 'a') as f:
                    print >> f, epoch, 'hinge loss and training error', train_thing

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100, this_test_score * 100.))
                with open(logdir + 'hook.txt', 'a') as f:
                    print >> f, (
                        'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100, this_test_score * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    with open(logdir + 'hook.txt', 'a') as f:
                        print >> f, (
                            ('     epoch %i, minibatch %i/%i, test error of'
                             ' best model %f %%') %
                            (epoch, minibatch_index + 1, n_train_batches,
                             test_score * 100.))

        if epoch % 50 == 0:
            model = parameters()
            for i in xrange(len(model)):
                model[i] = np.asarray(model[i]).astype(np.float32)
            np.savez(logdir + 'model-' + str(epoch), model=model)

        print 'hinge loss and training error', minibatch_avg_cost / float(
            n_train_batches), train_error / float(n_train_batches)
        print 'time', time.clock() - tmp1
        with open(logdir + 'hook.txt', 'a') as f:
            print >> f, 'hinge loss and training error', minibatch_avg_cost / float(
                n_train_batches), train_error / float(n_train_batches)
            print >> f, 'time', time.clock() - tmp1

    end_time = time.clock()
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Exemple #6
0
def svm_cva(dir,
            predir,
            start=0,
            end=500,
            learning_rate=3e-4,
            n_epochs=10000,
            dataset='./data/mnist.pkl.gz',
            batch_size=500):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    ''' 
    Difference
    '''
    print start, end, learning_rate, batch_size

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    _, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    train_set_x, valid_set_x, test_set_x = datapy.load_feature_gpu(dir=dir,
                                                                   start=start,
                                                                   end=end)

    print train_set_x.get_value().shape
    print valid_set_x.get_value().shape
    print test_set_x.get_value().shape

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels
    '''
    Differences
    '''

    y_matrix = T.imatrix(
        'y_matrix')  # labels, presented as 2D matrix of int labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    rng = np.random.RandomState(0)
    n_in = end - start
    classifier = Pegasos.Pegasos(input=x,
                                 rng=rng,
                                 n_in=n_in,
                                 n_out=10,
                                 weight_decay=1e-4,
                                 loss=1)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.objective(10, y, y_matrix)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    params = [classifier.W, classifier.b]
    grads = [g_W, g_b]

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.

    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate)
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001)
    updates = get_optimizer(params, grads)

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            y_matrix:
            train_y_matrix[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()

    logdir = dir + str(learning_rate) + '_c-'

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [
                    test_model(i) for i in xrange(n_test_batches)
                ]
                this_test_score = np.mean(this_test_losses)

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                with open(logdir + 'hook.txt', 'a') as f:
                    print >> f, (
                        'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100, this_test_score * 100.))
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100, this_test_score * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    with open(logdir + 'hook.txt', 'a') as f:
                        print >> f, (
                            ('     epoch %i, minibatch %i/%i, test error of'
                             ' best model %f %%') %
                            (epoch, minibatch_index + 1, n_train_batches,
                             test_score * 100.))

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    with open(logdir + 'hook.txt', 'a') as f:
        print >> f, (
            ('Optimization complete with best validation score of %f %%,'
             'with test performance %f %%') %
            (best_validation_loss * 100., test_score * 100.))
        print >> f, 'The code run for %d epochs, with %f epochs/sec' % (
            epoch, 1. * epoch / (end_time - start_time))
        print >> f, sys.stderr, ('The code for file ' +
                                 os.path.split(__file__)[1] +
                                 ' ran for %.1fs' % ((end_time - start_time)))
        print >> f, best_test_score

    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
    print best_test_score

    if predir is not None:
        # output put the joint result
        pre_train = np.load(predir + 'model-600.npz')
        pre_train = pre_train['model']
        pw = pre_train[-2]
        pb = pre_train[-1]
        params[0].set_value(pw)
        params[1].set_value(pb)
        ptest_losses = [test_model(i) for i in xrange(n_test_batches)]
        ptest_score = np.mean(ptest_losses)
        with open(logdir + 'hook.txt', 'a') as f:
            print >> f, 'Jointly trained classifier', ptest_score
        print 'Jointly trained classifier', ptest_score
def c_6layer_mnist_imputation(seed=0,
             pertub_type=3, pertub_prob=6, pertub_prob1=14,
             predir=None, n_batch=144,
             dataset='mnist.pkl.gz', batch_size=500):

    """
    Missing data imputation
    """    
    #cp->cd->cpd->cd->c
    nkerns=[32, 32, 64, 64, 64]
    drops=[0, 0, 0, 0, 0, 1]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden=[500, 50]
    drop_inverses=[1,]
    # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28
    
    if dataset=='mnist.pkl.gz':
        dim_input=(28, 28)
        colorImg=False
   
    train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1)
    
    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')
    #x_pertub = T.matrix('x_pertub')  # the data is presented as rasterized images
    #p_label = T.matrix('p_label')

    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    y_matrix = T.imatrix('y_matrix')

    drop = T.iscalar('drop')
    drop_inverse = T.iscalar('drop_inverse')
    
    activation = nonlinearity.relu

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    input_x = x.reshape((batch_size, 1, 28, 28))
    
    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(ConvMaxPool.ConvMaxPool(
            rng,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
            poolsize=(2, 2),
            border_mode='valid',
            activation=activation
        ))
    if drops[0]==1:
        cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(input=input_x))

    #2
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[1]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    
    #3
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[1], 12, 12),
        filter_shape=(nkerns[2], nkerns[1], 3, 3),
        poolsize=(2, 2),
        border_mode='valid', 
        activation=activation
    ))
    if drops[2]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[2], 5, 5),
        filter_shape=(nkerns[3], nkerns[2], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[3]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[3], 5, 5),
        filter_shape=(nkerns[4], nkerns[3], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[4]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input = cnn_output[-1].flatten(2)

    recg_layer.append(FullyConnected.FullyConnected(
        rng=rng,
        n_in=nkerns[4] * 5 * 5,
        n_out=500,
        activation=activation
    ))

    feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share)

    # classify the values of the fully-connected sigmoidal layer
    classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1)

    # the cost we minimize during training is the NLL of the model
    cost = classifier.hinge_loss(10, y, y_matrix) * batch_size
    weight_decay=1.0/n_train_batches

    # create a list of all model parameters to be fit by gradient descent
    params=[]
    for r in recg_layer:
        params+=r.params
    params += classifier.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    learning_rate = 3e-4
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay)
    updates = get_optimizer(params,grads)

    '''
    Save parameters and activations
    '''

    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    test_pertub_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x_pertub[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )


    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    
    ##################
    # Pretrain MODEL #
    ##################

    model_epoch = 250
    if os.environ.has_key('model_epoch'):
        model_epoch = int(os.environ['model_epoch'])
    if predir is not None:
        color.printBlue('... setting parameters')
        color.printBlue(predir)
        if model_epoch == -1:
            pre_train = np.load(predir+'best-model.npz')
        else:
            pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz')
        pre_train = pre_train['model']
        for (para, pre) in zip(params, pre_train):
            para.set_value(pre)
    else:
        exit()

    ###############
    # TRAIN MODEL #
    ###############
    valid_losses = [validate_model(i) for i in xrange(n_valid_batches)]
    valid_score = np.mean(valid_losses)

    test_losses = [test_model(i) for i in xrange(n_test_batches)]
    test_score = np.mean(test_losses)

    test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)]
    test_score_pertub = np.mean(test_losses_pertub)

    print valid_score, test_score, test_score_pertub