Ejemplo n.º 1
0
def test_RMSLE():
    A=np.ones((2,1))
    B=np.ones((2,1))
    A[0,0]=2
    A[1,0]=2

    y,y_pred,yy=load_data.shared_dataset(A,B,sample_size=2)
    cost=cnn.RMSLE(y,y_pred)
    assert (cost.eval() - np.sqrt(( np.log(2) - np.log(3) )**2 ) < 0.001 )
Ejemplo n.º 2
0
def test_MSE():
    A=np.zeros((2,1))
    B=np.zeros((2,1))
    A[0,0]=1
    A[1,0]=1

    y,y_pred,yy=load_data.shared_dataset(A,B,sample_size=2)
    cost=cnn.MSE(y,y_pred)
    assert (cost.eval() < 1.0001)
    assert (cost.eval() > 0.9999)
Ejemplo n.º 3
0
def test_shared_dataset():
    # Below AB is not tested, it returns list of input arrays
    A = np.ones((2, 2))
    B = np.ones((2, 1))
    C = np.ones((2, ))
    As, Bs, AB = load_data.shared_dataset(A, B, sample_size=2)
    As, Cs, AC = load_data.shared_dataset(A, B, sample_size=2)
    same = True
    for i in range(2):
        for j in range(2):
            if (np.abs(A[i, j] - As.get_value(borrow=True)[i, j]) > 0.01):
                same = False
    for i in range(2):
        if (np.abs(B[i, 0] - Bs.get_value(borrow=True)[i, 0]) > 0.01):
            same = False
    for i in range(2):
        if (np.abs(C[i] - Cs.get_value(borrow=True)[i]) > 0.01):
            same = False

    assert same
Ejemplo n.º 4
0
def TrainCNN():

    # Training, validation and test data
    valid_set_x, valid_set_y, valid_set = load_data.shared_dataset(
        datapar.Xval, datapar.Yval, sample_size=hyppar.Nval)
    train_set_x, train_set_y, train_set = load_data.shared_dataset(
        datapar.Xtrain, datapar.Ytrain, sample_size=hyppar.Ntrain)
    test_set_x = load_data.shared_testset(datapar.Xtest)

    # Hyperparameters
    learning_rate = hyppar.learning_rate
    num_epochs = hyppar.Nepoch
    num_filters = hyppar.Nchannel
    mini_batch_size = hyppar.mbs
    reg = hyppar.reg

    # Random set for following activations
    rset = rd.sample(range(valid_set_x.get_value(borrow=True).shape[0]),
                     mini_batch_size)
    print(rset)
    # Seeding the random number generator
    rng = np.random.RandomState(23455)

    # Computing number of mini-batches
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= mini_batch_size
    n_valid_batches //= mini_batch_size
    n_test_batches //= mini_batch_size

    print('train: %d batches, validation: %d batches' %
          (n_train_batches, n_valid_batches))

    # mini-batch index
    mb_index = T.lscalar()
    # Coulomb matrices ( mini_batch_size x 80 x 80 matrix)
    x = T.matrix('x')
    # Target energies (1 x mini_batch_size)
    y = T.matrix('y')

    print('***** Constructing model ***** ')

    # Reshaping tensor of mini_batch_size set of images into a
    # 4-D tensor of dimensions: mini_batch_size x 1 x 80 x 80
    layer0_input = x.reshape((mini_batch_size, 1, 80, 80))

    # Define the CNN function
    E_pred, cn_output, params = CNNStructure(layer0_input, mini_batch_size,
                                             rng)

    # Cost that is minimised during stochastic descent. Includes regularization
    cost = cnn.MSE(y, E_pred)

    L2_reg = 0
    for i in range(len(params)):
        L2_reg = L2_reg + T.mean(T.sqr(params[i][0]))

    cost = cost + reg * L2_reg

    # Creates a Theano function that computes the mistakes on the validation set.
    # This performs validation.

    # Note: the givens parameter allows us to separate the description of the
    # Theano model from the exact definition of the inputs variable. The 'key'
    # that is passed to the graph is subsituted with the data from the givens
    # parameter. In this demo we built the model with a regular Theano tensor
    # and we use givens to speed up the GPU. We swap the input index with a
    # slice corresponding to the mini-batch of the dataset to use.

    # mb_index is the mini_batch_index
    valid_model = theano.function(
        [mb_index],
        cost,
        givens={
            x:
            valid_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            valid_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    predict = theano.function(
        [mb_index],
        E_pred,
        givens={
            x:
            valid_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    test_model = theano.function(
        [mb_index],
        E_pred,
        givens={
            x:
            test_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                       mini_batch_size]
        })

    get_activations = theano.function([],
                                      cn_output,
                                      givens={x: valid_set_x[rset]})

    # Creates a function that updates the model parameters by SGD.
    # The updates list is created by looping over all
    # (params[i], grads[i]) pairs.
    updates = cnn.gradient_updates_Adam(cost, params, learning_rate)

    # Create a Theano function to train our convolutional neural network.
    train_model = theano.function(
        [mb_index],
        cost,
        updates=updates,
        givens={
            x:
            train_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            train_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    iter = 0
    epoch = 0
    cost_ij = 0
    valid_losses = [valid_model(i) for i in range(n_valid_batches)]
    valid_score = np.mean(valid_losses)

    train_error = []
    valid_error = []

    statistics.saveParameters(params)

    # This is where we call the previously defined Theano functions.
    print('***** Training model *****')
    while (epoch < num_epochs):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            # Compute number of iterations performed or total number
            # of mini-batches executed.
            iter = (epoch - 1) * n_train_batches + minibatch_index

            # Perform the training of our convolution neural network.
            # Obtain the cost of each minibatch specified using the
            # minibatch_index.
            cost_ij = train_model(minibatch_index)

            if iter % 10 == 0:
                statistics.saveParameters(params)
            if iter % 2 == 0:
                activations = get_activations()
                statistics.saveActivations(activations)

            # Save training error
            train_error.append(float(cost_ij))

            valid_losses = [valid_model(i) for i in range(n_valid_batches)]
            # Compute the mean prediction error across all the mini-batches.
            valid_score = np.mean(valid_losses)
            # Save validation error
            valid_error.append(valid_score)

            print("Iteration: " + str(iter + 1) + "/" +
                  str(num_epochs * n_train_batches) + ", training error: " +
                  str(cost_ij) + ", validation error: " + str(valid_score))

            if (iter % 20 == 0):
                # Get predicted energies from validation set
                E = np.zeros((n_valid_batches * mini_batch_size, 1))
                step = 0
                for i in range(n_valid_batches):
                    buf = predict(i)
                    for j in range(mini_batch_size):
                        E[step, 0] = buf[j]
                        step = step + 1
                np.savetxt('output/E_pred_' + str(iter) + '.txt', E)

    # Predict energies for test set
    E_test = np.zeros((n_test_batches * mini_batch_size, 1))
    step = 0
    for i in range(n_test_batches):
        buf = test_model(i)
        for j in range(mini_batch_size):
            E_test[step, 0] = buf[j]
            step = step + 1

    statistics.writeActivations()
    # Return values:
    statistics.saveParameters(params)
    statistics.writeParameters()
Ejemplo n.º 5
0
def cnnText(initial_learning_rate=0.0001,
            initial_momentum=0.5,
            n_epochs=100,
            dataset='mnist.pkl.gz',
            nkerns=[64, 64],
            batch_size=5):
    """ 
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)
    print 'loading data'
    datasets = load_ag_news()
    print 'finished'
    train_x, train_y = datasets[0]
    #valid_set_x, valid_set_y = datasets[1]
    test_x, test_y = datasets[1]

    train_set_x, train_set_y = shared_dataset([train_x, train_y])
    test_set_x, test_set_y = shared_dataset([test_x, test_y])

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    #n_train_batches = 2000 / batch_size
    #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    #n_valid_batches = 1000 / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    #n_test_batches = 1000 / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    is_train = T.iscalar(
        'is_train'
    )  # pseudo boolean for switching between training and prediction

    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    total_len = 231
    print 'total_len = ', total_len
    layer0_input = x.reshape((batch_size, 1, total_len, 70))

    layer_conv0 = ConvLayer(rng,
                            input=layer0_input,
                            image_shape=(batch_size, 1, total_len, 70),
                            filter_shape=(256, 1, 7, 1))

    layer_pool0 = PoolLayer(input=layer_conv0.output, poolsize=(3, 1))

    layer_conv1 = ConvLayer(rng,
                            input=layer_pool0.output,
                            image_shape=(batch_size, 256, (total_len - 6) / 3,
                                         70),
                            filter_shape=(256, 256, 7, 1))

    layer_pool1 = PoolLayer(input=layer_conv1.output, poolsize=(3, 1))

    layer_conv20 = ConvLayer(rng,
                             input=layer_pool1.output,
                             image_shape=(batch_size, 256,
                                          ((total_len - 6) / 3 - 6) / 3, 70),
                             filter_shape=(256, 256, 3, 1))

    layer_conv21 = ConvLayer(rng,
                             input=layer_conv20.output,
                             image_shape=(batch_size, 256,
                                          ((total_len - 6) / 3 - 6) / 3 - 2,
                                          70),
                             filter_shape=(256, 256, 3, 1))

    layer_conv22 = ConvLayer(rng,
                             input=layer_conv21.output,
                             image_shape=(batch_size, 256,
                                          ((total_len - 6) / 3 - 6) / 3 - 4,
                                          70),
                             filter_shape=(256, 256, 3, 1))

    layer_conv23 = ConvLayer(rng,
                             input=layer_conv22.output,
                             image_shape=(batch_size, 256,
                                          ((total_len - 6) / 3 - 6) / 3 - 6,
                                          70),
                             filter_shape=(256, 256, 3, 1))

    layer_pool2 = PoolLayer(input=layer_conv23.output, poolsize=(3, 1))

    layer_fc0_input = layer_pool2.output.flatten(2)

    layer_fc0 = fcLayer(rng,
                        is_train=is_train,
                        input=layer_fc0_input,
                        n_in=(((total_len - 6) / 3 - 6) / 3 - 8) / 3 * 70 *
                        256,
                        n_out=1024,
                        activation=ReLu,
                        dropout_rate=0.5)

    layer_fc1 = fcLayer(rng,
                        is_train=is_train,
                        input=layer_fc0.output,
                        n_in=1024,
                        n_out=1024,
                        activation=ReLu,
                        dropout_rate=0.5)

    # classify the values of the fully-connected sigmoidal layer
    layer_softmax = LogisticRegression(input=layer_fc1.output,
                                       n_in=1024,
                                       n_out=4)

    # the cost we minimize during training is the NLL of the model
    cost = layer_softmax.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer_softmax.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            is_train: numpy.cast['int32'](0)
        })
    '''
    validate_model = theano.function(
        [index],
        layer_softmax.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            is_train: numpy.cast['int32'](0)
        }
    )
    '''
    # create a list of all model parameters to be fit by gradient descent
    params = layer_conv0.params + layer_conv1.params + layer_conv20.params + layer_conv21.params + layer_conv22.params + layer_conv23.params\
        + layer_fc0.params + layer_fc1.params + layer_softmax.params

    learning_rate = theano.shared(
        numpy.cast[theano.config.floatX](initial_learning_rate))
    initial_learning_rate_val = initial_learning_rate

    # momentum method
    assert initial_momentum >= 0. and initial_momentum < 1.

    momentum = theano.shared(
        numpy.cast[theano.config.floatX](initial_momentum), name='momentum')

    updates = []
    for param in params:
        param_update = theano.shared(param.get_value() *
                                     numpy.cast[theano.config.floatX](0.))
        updates.append((param, param - learning_rate * param_update))
        updates.append((param_update, momentum * param_update +
                        (numpy.cast[theano.config.floatX](1.) - momentum) *
                        T.grad(cost, param)))

    train_model = theano.function(
        [index],
        layer_softmax.errors(y),
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            is_train: numpy.cast['int32'](1)
        })

    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant

    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    train = []
    valid = []
    test = []
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        train_losses = 0.
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            train_losses = train_losses + train_model(minibatch_index)

            # test it on the test set
        test_losses = [test_model(i) for i in xrange(n_test_batches)]
        test_score = numpy.mean(test_losses)
        print(('     epoch %i, minibatch %i/%i, test error of '
               ' model %f %%') %
              (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))
        test.append((epoch, minibatch_index + 1, n_train_batches,
                     test_score * 100.))  #

        print('epoch %i, training error %f %%' %
              (epoch, train_losses * 100. / n_train_batches))
        train.append(train_losses * 100)

        if momentum.get_value() < 0.99:
            new_momentum = 1. - (1. - momentum.get_value()) * 0.98
            momentum.set_value(numpy.cast[theano.config.floatX](new_momentum))
        # adaption of learning rate
        new_learning_rate = learning_rate.get_value() * 0.985
        #new_learning_rate = initial_learning_rate_val*1.1 /(1+0.1*epoch)
        #get from tutorial p48
        learning_rate.set_value(
            numpy.cast[theano.config.floatX](new_learning_rate))

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print train
    print test

    f = file('log_BN1_wb.txt', 'wb')
    cPickle.dump((train, test), f)
    f.close()
    f1 = file('log_BN1_w.txt', 'b')
    cPickle.dump((train, test), f1)
    f1.close()
    '''
    params = [layer_conv0.W.get_value(), layer_conv0.b.get_value(),
        layer_conv1.W.get_value(), layer_conv1.b.get_value(),
        layer_conv2.W.get_value(), layer_conv2.b.get_value(),
        layer_conv3.W.get_value(), layer_conv3.b.get_value(),
        layer_fc0.W.get_value(), layer_fc0.b.get_value(),
        layer_fc1.W.get_value(), layer_fc1.b.get_value(),
        layer_softmax.W.get_value(), layer_softmax.b.get_value(),
        random_l.W.get_value()]
    '''
    f = file('bn0.save', 'wb')
    cPickle.dump(params, f)

    spath = 'mao_drop.txt'
    fii = open(spath, "w")
    cPickle.dump(best_validation_loss, fii)
    cPickle.dump(best_iter + 1, fii)
    cPickle.dump(test_score * 100., fii)
    cPickle.dump((end_time - start_time) / 60., fii)

    f.close()
Ejemplo n.º 6
0
def evaluate(init_learning_rate=0.1, n_epochs=200,
                    datasets='Trace' ,nkerns=[256, 256], n_train_batch=10,
                    trans='euc', active_func=T.tanh, window_size = 0.2, 
                    ada_flag = False, pool_factor = 2, slice_ratio = 1
                    ):

    rng = numpy.random.RandomState(23455) #set random seed
    learning_rate = theano.shared(numpy.asarray(init_learning_rate,dtype=theano.config.floatX))
    #used for learning_rate decay
 
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    ori_len = datasets[3]
    slice_ratio = datasets[4]

    valid_num = valid_set_x.shape[0]
    increase_num = ori_len - int(ori_len * slice_ratio) + 1 #this can be used as the bath size
    print "increase factor is ", increase_num, ', ori len', ori_len
    valid_num_batch = valid_num / increase_num

    test_num = test_set_x.shape[0]
    test_num_batch = test_num / increase_num

    length_train = train_set_x.shape[1] #length after slicing.
    num_of_categories = int(train_set_y.max()) + 1
 
    window_size = int(length_train * window_size) if window_size < 1 else int(window_size)

    #*******set up the ma and ds********#
    ma_base,ma_step,ma_num   = 5, 6, 0
    ds_base,ds_step, ds_num  = 2, 1, 4

    ds_num_max = length_train / (pool_factor * window_size)
    ds_num = min(ds_num, ds_num_max)
    
    #*******set up the ma and ds********#

    (ma_train, ma_valid, ma_test , ma_lengths) = batch_movingavrg(train_set_x,
                                                    valid_set_x, test_set_x,
                                                    ma_base, ma_step, ma_num)
    (ds_train, ds_valid, ds_test , ds_lengths) = batch_downsample(train_set_x,
                                                    valid_set_x, test_set_x,
                                                    ds_base, ds_step, ds_num)
 
    #concatenate directly
    data_lengths = [length_train] 
    #downsample part:
    if ds_lengths != []:
        data_lengths +=  ds_lengths
        train_set_x = numpy.concatenate([train_set_x, ds_train], axis = 1)
        valid_set_x = numpy.concatenate([valid_set_x, ds_valid], axis = 1)
        test_set_x = numpy.concatenate([test_set_x, ds_test], axis = 1)

    #moving average part
    if ma_lengths != []:
        data_lengths += ma_lengths
        train_set_x = numpy.concatenate([train_set_x, ma_train], axis = 1)
        valid_set_x = numpy.concatenate([valid_set_x, ma_valid], axis = 1)
        test_set_x = numpy.concatenate([test_set_x, ma_test], axis = 1)

    train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y)
    
    valid_set_x = shared_data_x(valid_set_x)
    test_set_x = shared_data_x(test_set_x)

    #compute number of minibatches for training, validation and testing
    n_train_size = train_set_x.get_value(borrow=True).shape[0]
    n_valid_size = valid_set_x.get_value(borrow=True).shape[0]
    n_test_size = test_set_x.get_value(borrow=True).shape[0]
    batch_size = n_train_size / n_train_batch
    n_train_batches = n_train_size / batch_size
    data_dim = train_set_x.get_value(borrow=True).shape[1]
    print 'train size', n_train_size, ',valid size', n_valid_size, ' test size', n_test_size
    print 'batch size ', batch_size
    print 'n_train_batches is ', n_train_batches
    print 'data dim is ', data_dim
    print '---------------------------'

    # allocate symbolic variables for the data
    index = T.lscalar('index')  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   
    y = T.ivector('y')  
                       
    x_vote = T.matrix('xvote')   # the data is presented as rasterized images
    #y_vote = T.ivector('y_vote')  # the labels are presented as 1D vector of

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print 'building the model...'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = []
    inputs = x.reshape((batch_size, 1, data_dim, 1))
    
    layer0_input_vote = []
    inputs_vote = x_vote.reshape((increase_num, 1, data_dim, 1))
    ind = 0
    for i in xrange(len(data_lengths)):
        layer0_input.append(inputs[:,:,ind : ind + data_lengths[i],:])
        layer0_input_vote.append(inputs_vote[:,:,ind : ind + data_lengths[i],:])
        ind += data_lengths[i]

    layer0 = []
    layer0_vote = []
    feature_map_size = 0

    for i in xrange(len(layer0_input)):
        pool_size = (data_lengths[i] - window_size + 1) / pool_factor 
        feature_map_size += (data_lengths[i] - window_size + 1) / pool_size

        layer0.append(ShapeletPoolLayer(
            numpy.random.RandomState(23455 + i),
            input=layer0_input[i],
            image_shape=(batch_size, 1, data_lengths[i], 1),
            filter_shape=(nkerns[0], 1, window_size, 1),
            poolsize=(pool_size , 1),
            trans = trans,
            active_func=active_func
        ))
        layer0_vote.append(ShapeletPoolLayer(
            numpy.random.RandomState(23455 + i),
            input=layer0_input_vote[i],
            image_shape=(increase_num, 1, data_lengths[i], 1),
            filter_shape=(nkerns[0], 1, window_size, 1),
            poolsize=(pool_size , 1),
			W = layer0[i].W,
            trans = trans,
            active_func=active_func
        ))

    layer1_input = layer0[0].output.flatten(2)
    layer1_vote_input = layer0_vote[0].output.flatten(2)
    for i in xrange(1, len(data_lengths)):
        layer1_input = T.concatenate([layer1_input, layer0[i].output.flatten(2)], axis = 1)
        layer1_vote_input = T.concatenate([layer1_vote_input, layer0_vote[i].output.flatten(2)], axis = 1)

    # construct a fully-connected sigmoidal layer
    layer1 = HiddenLayer(
        rng,
        input=layer1_input,
        n_in=nkerns[0] * feature_map_size,
        n_out=nkerns[1],
        activation=active_func,
        previous_layer = None
    )
    # construct a fully-connected sigmoidal layer for prediction
    layer1_vote = HiddenLayer(
        rng,
        input=layer1_vote_input,
        n_in=nkerns[0] * feature_map_size,
        n_out=nkerns[1],
        activation=active_func,
        previous_layer = None,
        W = layer1.W,
        b = layer1.b
    )

    # classify the values of the fully-connected sigmoidal layer
    layer2 = LogisticRegression(input=layer1.output, n_in=nkerns[1], n_out= num_of_categories , previous_layer = None)
    layer2_vote = LogisticRegressionVote(input=layer1_vote.output, n_in=nkerns[1], n_out= num_of_categories , previous_layer = None, W = layer2.W, b = layer2.b)

    # the cost we minimize during training is the NLL of the model
    cost = layer2.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer2_vote.prediction(),
        givens={
            x_vote : test_set_x[index * (increase_num) : (index + 1) * (increase_num)]
        }
    )
    # function for validation set. Return the prediction value
    validate_model = theano.function(
        [index],
        layer2_vote.prediction(),
        givens={
            x_vote : valid_set_x[index * (increase_num) : (index + 1) * (increase_num)]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer2.params + layer1.params
    for i in xrange(len(layer0_input)):
        params += layer0[i].params

    # Adagradient part
    grads = T.grad(cost, params)
    import copy
    G = [] 
    for i in xrange(2 + len(layer0_input)):
        G.append( theano.shared(
            numpy.zeros(params[i].shape.eval(), dtype=theano.config.floatX
            ),
            borrow=True
        ))

    # parameter update methods
    if ada_flag == True:
        updates = [
            (param_i, param_i -  learning_rate * (grad_i / (T.sqrt(G_i) + 1e-5) ))
            for param_i, grad_i, G_i in zip(params, grads, G)
        ]
    else:
        updates = [
            (param_i, param_i -  learning_rate * grad_i )
            for param_i, grad_i in zip(params, grads)
        ]
 
    update_G = theano.function(inputs=[index], outputs = G,
            updates=[(G_i, G_i  + T.sqr(grad_i) )
            for G_i, grad_i in zip(G,grads)],
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            }
            )
    reset_G = theano.function(inputs=[index],outputs = G,
            updates=[(G_i, grad_i - grad_i) 
            for G_i, grad_i in zip(G,grads)],
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            }
            )       

    #Our training function, return value: NLL cost and training error
    train_model = theano.function(
        [index],
        [cost, layer2.errors(y)],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    decrease_learning_rate = theano.function(inputs=[], outputs = learning_rate,
            updates={learning_rate: learning_rate * 1e-4})
    
    ###############
    # TRAIN MODEL #
    ###############
    print 'training...'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    best_test_iter = 0
    best_test_loss = numpy.inf
    test_patience = 200
    valid_loss = 0.
    test_loss = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    last_train_err = 1
    last_avg_err = float('inf')
    first_layer_prev = 0
    num_no_update_epoch = 0
    epoch_avg_cost = float('inf')
    epoch_avg_err = float('inf')

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epoch_train_err = 0.
        epoch_cost = 0.
        if ada_flag:
            reset_G(0)
        num_no_update_epoch += 1
        if num_no_update_epoch == 500:
            break
        for minibatch_index in xrange(n_train_batches):

            iteration = (epoch - 1) * n_train_batches + minibatch_index

            if ada_flag:
                update_G(minibatch_index)
            
            [cost_ij,train_err] = train_model(minibatch_index)
            
            epoch_train_err = epoch_train_err + train_err
            epoch_cost = epoch_cost + cost_ij
            
            if (iteration + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                # validation set loss
                valid_results = [validate_model(i) for i in xrange(valid_num_batch)]
                valid_losses = []
                for i in xrange(valid_num_batch):
                    y_pred = valid_results[i]
                    label = valid_set_y[i * increase_num]
                    unique_value, sub_ind, correspond_ind, count = numpy.unique(y_pred, True, True, True)
                    unique_value = unique_value.tolist()
                    curr_err = 1.
                    if label in unique_value:
                        target_ind = unique_value.index(label)
                        count = count.tolist()
                        sorted_count = sorted(count)
                        if count[target_ind] == sorted_count[-1]:
                            if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]:
                                curr_err = 0.5 #tie
                            else:
                                curr_err = 0.
                    valid_losses.append(curr_err)
                valid_loss = sum(valid_losses) / float(len(valid_losses)) 

                print('...epoch %i, valid err: %.5f |' %
                      (epoch, valid_loss)),

                # if we got the best validation score until now
                if valid_loss <= best_validation_loss:
                    num_no_update_epoch = 0

                    #improve patience if loss improvement is good enough
                    if valid_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iteration * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = valid_loss
                    best_iter = iteration

                    # test it on the test set
                    test_results = [test_model(i) for i in xrange(test_num_batch)]
                    test_losses = []
                    for i in xrange(test_num_batch):
                        y_pred = test_results[i]
                        label = test_set_y[i * increase_num]
                        unique_value, sub_ind, correspond_ind, count = numpy.unique(y_pred, True, True, True)
                        unique_value = unique_value.tolist()
                        curr_err = 1
                        if label in unique_value:
                            target_ind = unique_value.index(label)
                            count = count.tolist()
                            sorted_count = sorted(count)
                            if count[target_ind] == sorted_count[-1]:
                                if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]:
                                    curr_err = 0.5 # tie
                                else:
                                    curr_err = 0.
                        test_losses.append(curr_err)
                    test_loss = sum(test_losses) / float(len(test_losses)) 
                    print(('test err: %.5f |') %
                          (test_loss)),

                    best_test_loss = test_loss
                    test_patience = 200

            #test_patience -= 1 
            #if test_patience <= 0:
            #    break
            
            if patience <= iteration:
                done_looping = True
                break

        epoch_avg_cost = epoch_cost/n_train_batches
        epoch_avg_err = epoch_train_err/n_train_batches
        #curr_lr = decrease_learning_rate()
        last_avg_err = epoch_avg_cost
 
        print ('train err %.5f, cost %.4f' %(epoch_avg_err,epoch_avg_cost))
        if epoch_avg_cost == 0:
             break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test error: %f %%' %
          (best_validation_loss * 100., best_iter + 1, best_test_loss * 100.))
    print('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    return best_validation_loss
Ejemplo n.º 7
0
Archivo: lenet.py Proyecto: hphp/Kaggle
def train_by_lenet5(tr_start_index, tr_limit, vl_start_index, vl_limit, output_filename="tmp.file", learning_rate=0.13, n_epochs=5000):

    global train_dataset_route
    global valid_dataset_route

    output_file = open(output_filename, 'w')

    print train_dataset_route, type(train_dataset_route)
    """
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    train_set = tdtf.read_data_patch_to_ndarray(train_dataset_route, tr_start_index, tr_limit)
    datasets = load_data.shared_dataset(train_set)
    train_set_x, train_set_y = datasets

    valid_set = tdtf.read_data_patch_to_ndarray(valid_dataset_route, vl_start_index, vl_limit)
    print valid_set[1]
    datasets = load_data.shared_dataset(valid_set)
    valid_set_x, valid_set_y = datasets

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function([index], [cost, layer3.errors(y), layer3.params[0][0][0]], updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000 # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    min_train_cost = 10000
    decreasing_num = 0

    last_train_err = 1
    last_train_cost = 1

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter , ' patience = ' , patience
            cost_ij, train_err, par = train_model(minibatch_index)
            
            decreasing_rate = (last_train_err - train_err) / (last_train_err) * 100.
            last_train_err = train_err
            if last_train_err == 0:
                last_train_err += 0.0000001
            c_d_rate = (last_train_cost - cost_ij) / (last_train_cost) * 100.
            last_train_cost = cost_ij 
            print >> output_file, ('epoch %i, minibatch %i/%i, train_cost %f , train_error %.2f %%, decreasing rate %f %%, cost_decreasing rate %f %%, W00 ' % \
                (epoch, minibatch_index + 1, n_train_batches,
                cost_ij,
                train_err* 100.
                ,decreasing_rate
                ,c_d_rate))

            #print layer1.params[0:1][0][0:3]
            #print layer2.params[0:1][0][0:3]
            if cost_ij < min_train_cost:
                decreasing_num = 0
                min_train_cost = cost_ij
                layer0_state = layer0.__getstate__()
                layer1_state = layer1.__getstate__()
                layer2_state = layer2.__getstate__()
                layer3_state = layer3.__getstate__()
                trained_model_list = [layer0_state, layer1_state, layer2_state, layer3_state]
                trained_model_array = numpy.asarray(trained_model_list)
                classifier_file = open(train_model_route, 'w')
                cPickle.dump([1,2,3], classifier_file, protocol=2)
                numpy.save(classifier_file, trained_model_array)
                classifier_file.close()
            else:
                print "decreasing"
                decreasing_num += 1
                if decreasing_num > 100:
                    done_looping = True
                    break
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
            if patience <= iter:
                done_looping = True
                print patience , iter
                break

    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print >> output_file, ('Optimization complete.')
    print >> output_file, ('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
          
    output_file.close()
Ejemplo n.º 8
0
    def fit(self, X, Y):
        # Create a report to be saved at the end of execution 
        # (when running on the remote server)
        if self.do_report:
            report = {"learning_rate":self.learning_rate,
                      "training_epochs":self.training_epochs,
                      "batch_size":self.batch_size,
                      "n_chains":self.n_chains,
                      "n_samples":self.n_samples,
                      "n_hidden":self.n_hidden,
                      "k":self.k,
                      "costs":np.zeros(self.training_epochs),
#                      "accuracy":np.zeros(self.training_epochs),
                      "pretraining_time":0}
                      
        train_data = np.hstack([Y,X])
        
        n_visible = train_data.shape[1]
        
        # Building of theano format datasets
        train_set = shared_dataset(train_data)
        
        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set.get_value(borrow=True).shape[0] / \
            self.batch_size
        
        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')  # the data
        
        rng = np.random.RandomState(123)
        theano_rng = RandomStreams(rng.randint(2 ** 30))
        
        # initialize storage for the persistent chain (state = hidden
        # layer of chain)
        persistent_chain = theano.shared(np.zeros((self.batch_size, 
                                                   self.n_hidden),
                                                  dtype=theano.config.floatX),
                                         borrow=True)
        
        # construct the RBM class
        self.rbm = RBM(input=x,
                       n_visible=n_visible,
                       n_labels=self.n_labels,
                       n_hidden=self.n_hidden, 
                       np_rng=rng, 
                       theano_rng=theano_rng)
        
        # get the cost and the gradient corresponding to one step of CD-k
        cost, updates = self.rbm.get_cost_updates(lr=self.learning_rate,
                                                  persistent=persistent_chain, 
                                                  k=self.k)
                                             
#        accuracy = self.rbm.get_cv_error()
        
        #%%====================================================================
        # Training the RBM
        #======================================================================
        
        # it is ok for a theano function to have no output
        # the purpose of train_rbm is solely to update the RBM parameters
        train_rbm = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set[index * self.batch_size: \
                            (index + 1) * self.batch_size]
            },
            name='train_rbm'
        )
        
        start_time = timeit.default_timer()
    
        max_score = -np.inf
        argmax_score = RBM(input=x,
                           n_visible=n_visible,
                           n_labels=self.n_labels,
                           n_hidden=self.n_hidden, 
                           np_rng=rng, 
                           theano_rng=theano_rng)
#        count = 0
        
        ## go through training epochs
        for epoch in xrange(self.training_epochs):
        
            # go through the training set
            mean_cost = []
            for batch_index in xrange(n_train_batches):
                mean_cost += [train_rbm(batch_index)]
                
            print 'Training epoch %d, cost is ' % epoch, np.mean(mean_cost)
            
            score = np.mean(mean_cost)

            if score>max_score:
                max_score = score
                argmax_score.clone(self.rbm)
            
#            acc = accuracy.eval()
#            
#            if self.scoring=='cost':
#                score = np.mean(mean_cost)
#            elif self.scoring=='accuracy':
#                score = acc
#            else:
#                raise Warning('''scoring must be cost or accuracy, 
#                              set to accuracy''')
#                score = acc
#                
#            if score>max_score:
#                max_score = score
#                argmax_score.clone(self.rbm)
#                count = 0
#            else:
#                count += 1
#            
#            if count>2:
#                break
                
            if self.do_report:
                report["costs"][epoch] = np.mean(mean_cost)
#                report["accuracy"][epoch] = acc
         
        end_time = timeit.default_timer()
        pretraining_time = (end_time - start_time)
        report['pretraining_time'] = pretraining_time   
        
        self.rbm = argmax_score        
        
        if self.do_report:
            try:
                np.save(self.report_folder+'/'+self.report_name, report)
            except OSError:
                os.mkdir(self.report_folder)
                np.save(self.report_folder+'/'+self.report_name, report)
Ejemplo n.º 9
0
def evaluate_lenet5(dataset_route=DataHome+"DogVsCat_test_feature_2500.csv", \
                    nkerns=[20, 50], batch_size=5):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    trained_model_pkl = open(ModelHome + train_model_route, 'r')
    trained_model_state_list = cPickle.load(trained_model_pkl)
    trained_model_state_array = numpy.load(trained_model_pkl)
    layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array

    test_set = tdtf.read_data_to_ndarray(dataset_route, limit=None, header_n=0)
    test_set_x, id_arr = test_set
    datasets = load_data.shared_dataset(test_set)
    test_set_x, test_set_y = datasets
    print test_set_x.shape, test_set_y.shape

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (50, 50)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 50, 50))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
            image_shape=(batch_size, 1, 50, 50), \
            filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \
            W=layer0_state[0], b=layer0_state[1] \
            )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 20, 20),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \
            W=layer1_state[0], b=layer1_state[1] \
            )

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8,
                         n_out=100, activation=T.tanh,\
                         W=layer2_state[0], b=layer2_state[1] \
                         )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2, \
                                    W=layer3_state[0], b=layer3_state[1] \
                                )

    print "predicting"
    start_time = time.clock()
    # create a function to compute the mistakes that are made by the model
    test_results = theano.function(
        inputs=[index],
        outputs=layer3.y_pred,
        givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]})

    test_res = [test_results(i) for i in xrange(n_test_batches)]
    print test_res

    id_l = []
    label_l = []
    index = 0
    for arr in test_res:
        for label in arr:
            label_l.append(label)
            id_l.append(id_arr[index])
            index += 1
    tdtf.wr_to_csv(header=['id', 'label'],
                   id_list=id_l,
                   pred_list=label_l,
                   filename=test_label_route)
    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 10
0
def evaluate_lenet5(dataset_route=DataHome+"DogVsCat_test_feature_2500.csv", \
                    nkerns=[20, 50], batch_size=5):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    trained_model_pkl = open(ModelHome + train_model_route, 'r')
    trained_model_state_list = cPickle.load(trained_model_pkl)
    trained_model_state_array = numpy.load(trained_model_pkl)
    layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array

    test_set = tdtf.read_data_to_ndarray(dataset_route, limit=None, header_n=0)
    test_set_x, id_arr = test_set
    datasets = load_data.shared_dataset(test_set)
    test_set_x, test_set_y = datasets
    print test_set_x.shape, test_set_y.shape

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (50, 50)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 50, 50))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
            image_shape=(batch_size, 1, 50, 50), \
            filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \
            W=layer0_state[0], b=layer0_state[1] \
            )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 20, 20),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \
            W=layer1_state[0], b=layer1_state[1] \
            )

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8,
                         n_out=100, activation=T.tanh,\
                         W=layer2_state[0], b=layer2_state[1] \
                         )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2, \
                                    W=layer3_state[0], b=layer3_state[1] \
                                )

    print "predicting"
    start_time = time.clock()
    # create a function to compute the mistakes that are made by the model
    test_results = theano.function(inputs=[index],
            outputs= layer3.y_pred,
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size]})

    
    test_res = [test_results(i)
        for i in xrange(n_test_batches)]
    print test_res
   
    id_l = []
    label_l = []
    index = 0
    for arr in test_res:
        for label in arr:
            label_l.append(label)
            id_l.append(id_arr[index])
            index += 1
    tdtf.wr_to_csv(header=['id','label'], id_list=id_l, pred_list=label_l, filename=test_label_route)
    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 11
0
def visualize_MISTtraining():
    '''
    A function to demonstrate how convolutional and fully 
    connected layers are used to train CNN to learn to label MNIST
    digits. 

    Same function is used in testing, without any output. 

    Downloads data from online, if mnist zip file is dot present.

    More plotting features and such should be included, now only
    terminal output.

    Benchmark error on test set with current settings:  0.0445
    '''
    dataset = 'mnist.pkl.gz'
    data_dir, data_file = os.path.split(dataset)
    rd.seed(23455)
    # Check if data file present
    if data_dir == "" and not os.path.isfile(dataset):
        new_path = os.path.join('', dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    # Download the file from MILA if not present
    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        from six.moves import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz')
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

    print('***** Loading data *****')
    # Open the file
    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)

    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    test_x, test_y = test_set

    # Load data into tensors
    train_size = 6000
    test_set_x, test_set_y_float, test_set = load_data.shared_dataset(
        test_x, test_y, sample_size=train_size // 3)
    valid_set_x, valid_set_y_float, valid_set = load_data.shared_dataset(
        valid_x, valid_y, sample_size=train_size // 3)
    train_set_x, train_set_y_float, train_set = load_data.shared_dataset(
        train_x, train_y, sample_size=train_size)

    train_set_y = T.cast(train_set_y_float, 'int32')
    valid_set_y = T.cast(valid_set_y_float, 'int32')
    test_set_y = T.cast(test_set_y_float, 'int32')

    # Training set dimension: 6000 x 784
    print('Training set: %d samples' %
          (train_set_x.get_value(borrow=True).shape[0]))
    # Test set dimension: 2000 x 784
    print('Test set: %d samples' %
          (test_set_x.get_value(borrow=True).shape[0]))
    # Validation set dimension: 2000 x 784
    print('Validation set: %d samples' %
          (valid_set_x.get_value(borrow=True).shape[0]))
    print('The training set looks like this: ')
    print(train_set[0])
    print('The labels looks like this:')
    print(train_set[1])

    # set learning rate used for Stochastic Gradient Descent
    learning_rate = 0.005
    # set number of training epochs
    num_epochs = 4
    # set number of kernels for each convolution layer
    # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50
    num_filters = [9]
    # set mini-batch size to be used
    mini_batch_size = 50

    # Seeding the random number generator
    rng = np.random.RandomState(23455)

    # Computing number of mini-batches
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= mini_batch_size
    n_valid_batches //= mini_batch_size
    n_test_batches //= mini_batch_size

    print('train: %d batches, test: %d batches, validation: %d batches' %
          (n_train_batches, n_test_batches, n_valid_batches))

    # mini-batch index
    mb_index = T.lscalar()
    # rasterised images
    x = T.matrix('x')
    # image labels
    y = T.ivector('y')

    print('***** Constructing model ***** ')

    # Reshaping matrix of mini_batch_size set of images into a
    # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28
    layer0_input = x.reshape((mini_batch_size, 1, 28, 28))

    # First convolution and pooling layer
    # 4D output tensor is of shape:
    # mini_batch_size x 9 x 12 x 12
    [layer0_output,
     layer0_params] = cnn.convLayer(rng,
                                    data_input=layer0_input,
                                    image_spec=(mini_batch_size, 1, 28, 28),
                                    filter_spec=(num_filters[0], 1, 5, 5),
                                    pool_size=(2, 2),
                                    activation=T.tanh)

    # Flatten the output into dimensions:
    # mini_batch_size x 1296
    fc_layer_input = layer0_output.flatten(2)

    # The fully connected layer operates on a matrix of
    # dimensions: mini_batch_size x 1296
    # It clasifies the values using the softmax function.
    [y_lin, fc_layer_params
     ] = cnn.fullyConnectedLayer(rng,
                                 data_input=fc_layer_input,
                                 num_in=num_filters[0] * 12 * 12,
                                 num_out=10)

    # The likelihood of the categories
    p_y_given_x = T.nnet.softmax(y_lin)
    # Predictions
    y_pred = T.argmax(p_y_given_x, axis=1)

    # Cost that is minimised during stochastic descent.
    cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x)

    # Creates a Theano function that computes the mistakes on the validation set.
    # This performs validation.

    # Note: the givens parameter allows us to separate the description of the
    # Theano model from the exact definition of the inputs variable. The 'key'
    # that is passed to the graph is subsituted with the data from the givens
    # parameter. In this demo we built the model with a regular Theano tensor
    # and we use givens to speed up the GPU. We swap the input index with a
    # slice corresponding to the mini-batch of the dataset to use.

    # mb_index is the mini_batch_index
    valid_model = theano.function(
        [mb_index],
        errors(y, y_pred),
        givens={
            x:
            valid_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            valid_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    # Create a Theano function that computes the mistakes on the test set.
    # This evaluated our model's accuracy.
    test_model = theano.function(
        [mb_index],
        errors(y, y_pred),
        givens={
            x:
            test_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                       mini_batch_size],
            y:
            test_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                       mini_batch_size]
        })

    # List of parameters to be fit during training
    params = fc_layer_params + layer0_params
    # Creates a list of gradients
    grads = T.grad(cost, params)

    # Creates a function that updates the model parameters by SGD.
    # The updates list is created by looping over all
    # (params[i], grads[i]) pairs.
    #updates = [(param_i, param_i - learning_rate * grad_i)
    #           for param_i, grad_i in zip(params, grads)]

    updates = cnn.gradient_updates_Adam(cost, params, learning_rate)

    # Create a Theano function to train our convolutional neural network.
    train_model = theano.function(
        [mb_index],
        cost,
        updates=updates,
        givens={
            x:
            train_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            train_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    # Some code to help with the plotting.
    # You don't need to go through the plotting
    # code in detail.
    iter = 0
    epoch = 0
    cost_ij = 0

    train_costs = []
    valid_accuracy = []
    # This is where we call the previously defined Theano functions.
    print('***** Training model *****')
    while (epoch < num_epochs):
        print('epoch: ' + str(epoch))
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            # Compute number of iterations performed or total number
            # of mini-batches executed.
            iter = (epoch - 1) * n_train_batches + minibatch_index

            # Perform the training of our convolution neural network.
            # Obtain the cost of each minibatch specified using the
            # minibatch_index.
            cost_ij = train_model(minibatch_index)
            print('iter: ' + str(iter) + ', cost_ij: ' + str(cost_ij))
            train_costs.append(cost_ij)
        # Compute the prediction error on each validation mini-batch by
        # calling the previously defined Theano

        valid_losses = [valid_model(i) for i in range(n_valid_batches)]

        # Compute the mean prediction error across all the mini-batches.
        valid_score = np.mean(valid_losses)
        valid_accuracy.append(valid_score)

    print('***** Training Complete *****')

    test_losses = [test_model(i) for i in range(n_test_batches)]
    # Compute the mean prediction error across all the mini-batches.
    test_score = np.mean(test_losses)

    print('Accuracy on the test set: ' + str(test_score))

    fig, (ax1, ax2) = plt.subplots(2)
    ax1.plot(train_costs)
    ax2.plot(valid_accuracy)
    plt.show()
Ejemplo n.º 12
0
def test_convLayer():
    '''
    Constructs a CNN with one convolutional and one fully connected layer.
    Then the function trains the network to interpret MNIST digits. Same 
    script with some output and plotting features is found from "test.py".
    
    Calls two functions for cost and accuracy from above.

    Test: digit labeling accuracy > 92%

    NOTE: Valid set is not present here.
    '''

    dataset = 'mnist.pkl.gz'
    data_dir, data_file = os.path.split(dataset)
    rd.seed(23455)
    # Check if data file present
    if data_dir == "" and not os.path.isfile(dataset):
        new_path = os.path.join('', dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    # Download the file from MILA if not present                                                                 
    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        from six.moves import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
            )
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

    print('***** Loading data *****')
    # Open the file                                                                                              
    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)

    train_x,train_y=train_set
    test_x,test_y=test_set

    # Load data into tensors                                                                                     
    train_size = 6000
    test_set_x, test_set_y_float, test_set = load_data.shared_dataset(
        test_x,test_y,
        sample_size=train_size//3
        )
    train_set_x, train_set_y_float, train_set = load_data.shared_dataset(
        train_x,train_y,
        sample_size=train_size
        )

    train_set_y=T.cast(train_set_y_float,'int32')
    test_set_y=T.cast(test_set_y_float,'int32')

    # set learning rate used for Stochastic Gradient Descent                                                     
    learning_rate = 0.005
    # set number of training epochs                                                                              
    num_epochs = 4
    # set number of kernels for each convolution layer                                                           
    # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50                                                     
    num_filters = [9]
    # set mini-batch size to be used                                                                             
    mini_batch_size = 50


    # Seeding the random number generator                                                                        
    rng = np.random.RandomState(23455)

    # Computing number of mini-batches                                                                           
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= mini_batch_size
    n_test_batches //= mini_batch_size

    # mini-batch index                                                                                           
    mb_index = T.lscalar()
    # rasterised images                                                                                          
    x = T.matrix('x')
    # image labels                                                                                               
    y = T.ivector('y')

    # Reshaping matrix of mini_batch_size set of images into a                                                   
    # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28                                                    
    layer0_input = x.reshape((mini_batch_size, 1, 28, 28))

    # First convolution and pooling layer                                                                        
    # 4D output tensor is of shape:                                                                              
    # mini_batch_size x 9 x 12 x 12                                                                              
    [layer0_output, layer0_params] = cnn.convLayer(
        rng,
        data_input=layer0_input,
        image_spec=(mini_batch_size, 1, 28, 28),
        filter_spec=(num_filters[0], 1, 5, 5),
        pool_size=(2, 2),
        activation=T.tanh)
    
    # Flatten the output into dimensions:
    # mini_batch_size x 1296                                                                                     
    fc_layer_input = layer0_output.flatten(2)

    # The fully connected layer operates on a matrix of                                                          
    # dimensions: mini_batch_size x 1296                                                                         
    # It clasifies the values using the softmax function.                                                        
    [y_lin, fc_layer_params] = cnn.fullyConnectedLayer(
        rng,
        data_input=fc_layer_input,
        num_in=num_filters[0]*12*12,
        num_out=10)

    # The likelihood of the categories                                                                           
    p_y_given_x = T.nnet.softmax(y_lin)
    # Predictions                                                                                                
    y_pred =  T.argmax(p_y_given_x,axis=1)

    # Cost that is minimised during stochastic descent.                                                          
    cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x)

    # Create a Theano function that computes the mistakes on the test set.                                       
    # This evaluated our model's accuracy.                                                                       
    test_model = theano.function(
        [mb_index],
        errors(y, y_pred),
        givens={
            x: test_set_x[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ],
            y: test_set_y[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ]})

    # List of parameters to be fit during training                                                               
    params = fc_layer_params + layer0_params

    updates = cnn.gradient_updates_Adam(cost,params,learning_rate)

    # Create a Theano function to train our convolutional neural network.                                        
    train_model = theano.function(
        [mb_index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ],
            y: train_set_y[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ]})

    iter = 0
    epoch = 0
    cost_ij = 0

    # This is where we call the previously defined Theano functions.                                             
    while (epoch < num_epochs):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            cost_ij = train_model(minibatch_index)

    test_losses = [test_model(i) for i in range(n_test_batches)]
    test_score = np.mean(test_losses)
    assert test_score < 0.08
    if datatype=='line':
        data_path = '../synthetic_data/line_data.save'
    elif datatype=='circle':
        data_path = '../synthetic_data/circle_data.save'
    elif datatype=='spiral':
        data_path = '../synthetic_data/spiral_data.save'
    print 'opening data'
    f = open(data_path)
    train_data, valid_data, test_data = pickle.load(f)
    f.close()

    N, D = train_data[0].shape
    Nv = valid_data[0].shape[0]

    numpy_rng=numpy.random.RandomState()
    train_set = ld.shared_dataset(train_data);    
    test_set  = ld.shared_dataset(test_data);    
    valid_set = ld.shared_dataset(valid_data); 

    save = [] 
    sym_test_eucs = [] 
    sym_test_angs = [] 
    logs=[]
    print '---Trained : ' + model_type  + '---Noise Type: ' + noise_type
    for tmp in [1]:
    #for num_epoch in [200]:
    #for corrupt in [0.1,0.2]:
    #for epsilon in [0.01,0.06, 0.03, 0.001]:
    #for epsilon in [0.01,0.05, 0.03]:
    #for epsilon in [0.005, 0.003]:
    #for lam in [0.0005, 0.001, 0.005]:
Ejemplo n.º 14
0
    if datatype == 'line':
        data_path = '../synthetic_data/line_data.save'
    elif datatype == 'circle':
        data_path = '../synthetic_data/circle_data.save'
    elif datatype == 'spiral':
        data_path = '../synthetic_data/spiral_data.save'
    print 'opening data'
    f = open(data_path)
    train_data, valid_data, test_data = pickle.load(f)
    f.close()

    N, D = train_data[0].shape
    Nv = valid_data[0].shape[0]

    numpy_rng = numpy.random.RandomState()
    train_set = ld.shared_dataset(train_data)
    test_set = ld.shared_dataset(test_data)
    valid_set = ld.shared_dataset(valid_data)

    save = []
    sym_test_eucs = []
    sym_test_angs = []
    logs = []
    print '---Trained : ' + model_type + '---Noise Type: ' + noise_type
    for tmp in [1]:
        #for num_epoch in [200]:
        #for corrupt in [0.1,0.2]:
        #for epsilon in [0.01,0.06, 0.03, 0.001]:
        #for epsilon in [0.01,0.05, 0.03]:
        #for epsilon in [0.005, 0.003]:
        #for lam in [0.0005, 0.001, 0.005]:
Ejemplo n.º 15
0
def train_by_lenet5(tr_start_index,
                    tr_limit,
                    vl_start_index,
                    vl_limit,
                    output_filename="tmp.file",
                    learning_rate=0.13,
                    n_epochs=5000):

    global train_dataset_route
    global valid_dataset_route

    output_file = open(output_filename, 'w')

    print train_dataset_route, type(train_dataset_route)
    """
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    train_set = tdtf.read_data_patch_to_ndarray(train_dataset_route,
                                                tr_start_index, tr_limit)
    datasets = load_data.shared_dataset(train_set)
    train_set_x, train_set_y = datasets

    valid_set = tdtf.read_data_patch_to_ndarray(valid_dataset_route,
                                                vl_start_index, vl_limit)
    print valid_set[1]
    datasets = load_data.shared_dataset(valid_set)
    valid_set_x, valid_set_y = datasets

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index], [cost, layer3.errors(y), layer3.params[0][0][0]],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    min_train_cost = 10000
    decreasing_num = 0

    last_train_err = 1
    last_train_cost = 1

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter, ' patience = ', patience
            cost_ij, train_err, par = train_model(minibatch_index)

            decreasing_rate = (last_train_err -
                               train_err) / (last_train_err) * 100.
            last_train_err = train_err
            if last_train_err == 0:
                last_train_err += 0.0000001
            c_d_rate = (last_train_cost - cost_ij) / (last_train_cost) * 100.
            last_train_cost = cost_ij
            print >> output_file, ('epoch %i, minibatch %i/%i, train_cost %f , train_error %.2f %%, decreasing rate %f %%, cost_decreasing rate %f %%, W00 ' % \
                (epoch, minibatch_index + 1, n_train_batches,
                cost_ij,
                train_err* 100.
                ,decreasing_rate
                ,c_d_rate))

            #print layer1.params[0:1][0][0:3]
            #print layer2.params[0:1][0][0:3]
            if cost_ij < min_train_cost:
                decreasing_num = 0
                min_train_cost = cost_ij
                layer0_state = layer0.__getstate__()
                layer1_state = layer1.__getstate__()
                layer2_state = layer2.__getstate__()
                layer3_state = layer3.__getstate__()
                trained_model_list = [
                    layer0_state, layer1_state, layer2_state, layer3_state
                ]
                trained_model_array = numpy.asarray(trained_model_list)
                classifier_file = open(train_model_route, 'w')
                cPickle.dump([1, 2, 3], classifier_file, protocol=2)
                numpy.save(classifier_file, trained_model_array)
                classifier_file.close()
            else:
                print "decreasing"
                decreasing_num += 1
                if decreasing_num > 100:
                    done_looping = True
                    break
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
            if patience <= iter:
                done_looping = True
                print patience, iter
                break

    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print >> output_file, ('Optimization complete.')
    print >> output_file, ('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))

    output_file.close()
Ejemplo n.º 16
0
def sgd_optimization_mnist(tr_start_index=1, tr_limit=5000, vl_start_index=1, vl_limit=5000,
                           learning_rate=0.015, n_epochs=5000
                           , output_filename="ls.out"):

    output_file = open(output_filename,'w')
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                               # [int] labels
    in_shape = layer0_input_shape[0] * layer0_input_shape[1]

    batch_size = tr_limit
    train_set = tdtf.read_data_patch_to_ndarray(train_dataset_route, tr_start_index, tr_limit)
    datasets = load_data.shared_dataset(train_set)
    train_set_x, train_set_y = datasets

    valid_set = tdtf.read_data_patch_to_ndarray(valid_dataset_route, vl_start_index, vl_limit)
    print valid_set[1]
    datasets = load_data.shared_dataset(valid_set)
    valid_set_x, valid_set_y = datasets

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    if not if_load_trained_model :
        trained_model_pkl = open(train_model_route, 'r')
        trained_model_state_list = cPickle.load(trained_model_pkl)
        trained_model_state_array = numpy.load(trained_model_pkl)
        classifier_state = trained_model_state_array[0]

        classifier = LogisticRegression(input=x, n_in=in_shape, n_out=layer0_output_shape
                                        , W=classifier_state[0], b=classifier_state[1])

    else:

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        #print '... building the model'

        # construct the logistic regression class
        rng = numpy.random.RandomState(23555)
        W_bound=1
        tmp_W = theano.shared(numpy.asarray(
                rng.uniform(low=0, high=W_bound, size=(in_shape, layer0_output_shape)), dtype=theano.config.floatX),
                borrow=True)
        classifier = LogisticRegression(input=x, n_in=in_shape, n_out=layer0_output_shape)
                                    #,W=tmp_W)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(inputs=[index], \
            outputs=[cost, classifier.errors(y)], \
            updates=updates, \
            givens={ \
                x: train_set_x[index * batch_size:(index + 1) * batch_size], \
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    #print '... training the model'
    # early-stopping parameters
    patience = 50000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()
    best_train_loss = numpy.inf

    done_looping = False
    epoch = 0

    last_train_err = 1
    last_train_cost = 1

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost, train_err = train_model(minibatch_index)
            decreasing_rate = (last_train_err - train_err) / (last_train_err) * 100.
            last_train_err = train_err
            c_d_rate = (last_train_cost - minibatch_avg_cost) / (last_train_cost) * 100.
            last_train_cost = minibatch_avg_cost
            print >> output_file, ('epoch %i, minibatch %i/%i, train_cost %f , train_error %.2f %%, decreasing rate %f %%, cost_decreasing rate %f %%' % \
                (epoch, minibatch_index + 1, n_train_batches,
                minibatch_avg_cost,
                train_err* 100.
                ,decreasing_rate
                ,c_d_rate))

            if best_train_loss > train_err:
                best_train_loss = train_err

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss

                    # load trained_model to 
                    '''
                    layer_state = classifier.__getstate__()
                    trained_model_list = [layer_state]
                    trained_model_array = numpy.asarray(trained_model_list)
                    classifier_file = open(train_model_route, 'w')
                    cPickle.dump([1,2,3], classifier_file, protocol=2)
                    numpy.save(classifier_file, trained_model_array)
                    classifier_file.close()
                    '''
                    '''
                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    test_res = [test_results(i)
                                   for i in xrange(n_test_batches)]

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                       ' model %f %%') %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))
                     '''

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print >> output_file, (('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%'
           'with best train_performance %f %%') %
                 (best_validation_loss * 100., test_score * 100., best_train_loss * 100.))
    print >> output_file, 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
    output_file.close()