def __init__(self,batch_size,num_kernels,kernel_sizes,channel):
            
            self.layer0_input_size = (batch_size, 1, 100, 100)  # fixed size from the data
            self.edge0 = (100 - kernel_sizes[0][0] + 1) / 2
            self.layer0_output_size = (batch_size, num_kernels[0], self.edge0, self.edge0)
            # check that we have an even multiple of 2 before pooling
            assert ((100 - kernel_sizes[0][0] + 1) % 2) == 0

            # The actual input is the placeholder x reshaped to the input size of the network
            self.layer0_input = x[channel].reshape(self.layer0_input_size)
            self.layer0 = LeNetConvPoolLayer(rng,
                                        input=self.layer0_input,
                                        image_shape=self.layer0_input_size,
                                        subsample= (1,1),
                                        filter_shape=(num_kernels[0], 1) + kernel_sizes[0],
                                        poolsize=(2, 2))


            # ## Layer 1 - Second convolutional Layer
            # The second layer takes **`(batch_size, 10, 10, 10)`** as input, convolves it with 10 different **10x5x5** filters, and then downsamples (via maxpooling) in a **2x2** region.  Each filter/maxpool combination produces an output of size **`(10-5+1)/2 = 3`** on a side.
            # The size of the second layer's output is therefore **`(batch_size, 10, 3, 3)`**. 
            self.layer1_input_size = self.layer0_output_size
            self.edge1 = (self.edge0 - kernel_sizes[1][0] + 1) / 2
            self.layer1_output_size = (batch_size, num_kernels[1], self.edge1, self.edge1)

            # check that we have an even multiple of 2 before pooling
            assert ((self.edge0 - kernel_sizes[1][0] + 1) % 2) == 0

            self.layer1 = LeNetConvPoolLayer(rng,
                                        input=self.layer0.output,
                                        image_shape=self.layer1_input_size,
                                        subsample= (1,1),
                                        filter_shape=(num_kernels[1], num_kernels[0]) + kernel_sizes[1],
                                        poolsize=(2, 2))
Ejemplo n.º 2
0
    def __init__(self, batch_size, num_kernels, kernel_sizes, channel, x, y):
        self.layer0_input_size = (batch_size, 1, 100, 100
                                  )  # Input size from data
        self.edge0 = (100 - kernel_sizes[0][0] + 1) / 3  # New edge size
        self.layer0_output_size = (batch_size, num_kernels[0], self.edge0,
                                   self.edge0)  # Output size
        assert (
            (100 - kernel_sizes[0][0] + 1) % 3
        ) == 0  # Check pooling size                    # Check pooling size

        # Initialize Layer 0
        self.layer0_input = x.reshape(self.layer0_input_size)
        self.layer0 = LeNetConvPoolLayer(rng,
                                         input=self.layer0_input,
                                         image_shape=self.layer0_input_size,
                                         subsample=(1, 1),
                                         filter_shape=(num_kernels[0], 1) +
                                         kernel_sizes[0],
                                         poolsize=(3, 3))

        self.layer1_input_size = self.layer0_output_size  # Input size Layer 1
        self.edge1 = (self.edge0 - kernel_sizes[1][0] + 1) / 2  # New edge size
        self.layer1_output_size = (batch_size, num_kernels[1], self.edge1,
                                   self.edge1)  # Output size
        assert ((self.edge0 - kernel_sizes[1][0] + 1) %
                2) == 0  # Check pooling size

        # Initialize Layer 1
        self.layer1 = LeNetConvPoolLayer(
            rng,
            input=self.layer0.output,
            image_shape=self.layer1_input_size,
            subsample=(1, 1),
            filter_shape=(num_kernels[1], num_kernels[0]) + kernel_sizes[1],
            poolsize=(2, 2))
Ejemplo n.º 3
0
def load_trained_model():
    global if_load_trained_model
    global train_model_route
    global layer0_input
    global layer0
    global layer1
    global layer2_input
    global layer2
    global layer3
    global test_results

    if_load_trained_model = 1
    print "loading trained model for the first time"
    trained_model_pkl = open(train_model_route, 'r')
    trained_model_state_list = cPickle.load(trained_model_pkl)
    trained_model_state_array = numpy.load(trained_model_pkl)
    layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array

    ishape = (50, 50)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    layer0_input = x.reshape((batch_size, 1, 50, 50))
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
            image_shape=(batch_size, 1, 50, 50), \
            filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \
            W=layer0_state[0], b=layer0_state[1] \
            )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 20, 20),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \
            W=layer1_state[0], b=layer1_state[1] \
            )

    layer2_input = layer1.output.flatten(2)
    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8,
                         n_out=100, activation=T.tanh,\
                         W=layer2_state[0], b=layer2_state[1] \
                         )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=3, \
                                    W=layer3_state[0], b=layer3_state[1] \
                                )
    test_results = theano.function(inputs=[x], \
        outputs= layer3.y_pred)
Ejemplo n.º 4
0
def convLayer0(input2, nkerns=[20, 50]):
    rng = numpy.random.RandomState(23455)
    x = T.matrix('x')  # the data is presented as rasterized images
    layer0_input = x.reshape((1, 1, 50, 50))
    print(type(layer0_input))
    print(layer0_input.ndim)

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(1, 1, IMAGE_WIDTH, IMAGE_HEIGHT),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    f = theano.function([x], layer0.output)
    print(type(input2))
    output = f(input2)
    print(output.shape)
    for k in range(20):
        for i in range(23):
            for j in range(23):
                output[0][k][i][j] = output[0][k][i][j] * 256
    for i in range(20):
        cv2.imwrite(Constants.IMG_DIR_TENCENT_SPLIT + str(i) + "111.jpg",
                    output[0][i])
    for i in range(22):
        for j in range(22):
            print(output[0][0][i][j])
Ejemplo n.º 5
0
f = open('model.dat', 'rb')
params = cPickle.load(f)
f.close()

input = T.matrix('input')
label = T.ivector('label')
nkerns = [20, 50]
rng = np.random.RandomState(3510)
batch_size = 1

layer0_input = input.reshape((batch_size, 1, 50, 50))
layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 1, 50, 50),
    filter_shape=(nkerns[0], 1, 5, 5),
    poolsize=(2, 2),
    stride=(3, 3),
    W=params[6].get_value(),
    b=params[7].get_value(),
)

layer1 = LeNetConvPoolLayer(
    rng,
    input=layer0.output,
    image_shape=(batch_size, nkerns[0], 8, 8),
    filter_shape=(nkerns[1], nkerns[0], 4, 4),
    poolsize=(1, 1),
    stride=(1, 1),
    W=params[4].get_value(),
    b=params[5].get_value(),
)
Ejemplo n.º 6
0
def main_ver1_sqeu_2(learning_rate=0.05, weight_decay=0.001, n_epochs=200, nkerns=[20, 30],batch_size=500):

    name = 'Sequence_'

    rng = numpy.random.RandomState(23455)

    datasets = loaddata_mnist()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train = train_set_x.get_value(borrow=True).shape[0]
    n_valid = valid_set_x.get_value(borrow=True).shape[0]
    n_test = test_set_x.get_value(borrow=True).shape[0]

    # print(str(n_train), str(n_valid),str(n_test))

    test_set_x = test_set_x.reshape((n_test, 1, 28, 28))
    valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28))
    train_set_x = train_set_x.reshape((n_train, 1, 28, 28))

    n_train_batches = n_train // batch_size
    n_valid_batches = n_valid // batch_size
    n_test_batches = n_test // batch_size

    x = T.matrix('x')
    y = T.ivector('y')
    index = T.lscalar()

    print('... loading the model')

    layer0_input = x.reshape((batch_size, 1, 28, 28))

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression_nonzeroini(rng, input=layer2.output, n_in=500, n_out=10)

    cost = layer3.negative_log_likelihood(y)

    params = layer3.params + layer2.params + layer1.params + layer0.params

    grads = T.grad(cost, params)

    updates = [
            (param_i, param_i - learning_rate * grad_i)# + weight_decay * param_i)
            for param_i, grad_i in zip(params, grads)]

    patience_increase = 4
    improvement_threshold = 0.00001

    start_time = timeit.default_timer()

    print('... training')

    temp_time_1 = timeit.default_timer()
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    patience = 200000
    validation_frequency = min(n_train_batches, patience // 2)
    epoch = 0
    done_looping = False
    error_line = numpy.zeros(n_epochs)

    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            layer0.input: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            layer0.input: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            layer0.input: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss*100))
                error_line[epoch-1] = this_validation_loss

                if this_validation_loss < best_validation_loss:

                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score*100))

                    [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \
                        [layer0, layer1, layer2_input, layer2, layer3]

                temp_model = [layer0, layer1, layer2_input, layer2, layer3]
                with open(name + str(epoch) + '.pkl', 'wb') as f:
                    pickle.dump(temp_model, f)

            if patience <= iter:
                done_looping = True
                break

    with open(name + 'final.pkl', 'wb') as f:
        pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3], f)

    error_line = error_line[0:epoch-1]/100

    scipy.io.savemat('Sqeuence.mat', mdict={'Error_Spectrum': error_line})

    temp_time_2 = timeit.default_timer()
    print('%.2fm' % ((temp_time_2 - temp_time_1) / 60.))

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f  obtained at iteration %i, '
          'with test performance %f ' %
          (best_validation_loss, best_iter + 1, test_score))
    print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 7
0
def random_epoch_train_begining(learning_rate=0.05,
                                weight_decay=0.001,
                                nkerns=[20, 50],
                                n_epochs=200,
                                batch_size=500,
                                dataset='mnist.pkl.gz',
                                name_given='test'):

    #name = 'FashionMnist_'+str(learning_rate)+'_'+str(weight_decay) + '_' + str(nkerns) + 'Rand_Trans_Relu2_Begin'
    name = name_given
    rng = numpy.random.RandomState(23455)
    datasets = loaddata_mnist(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train = train_set_x.get_value(borrow=True).shape[0]
    n_valid = valid_set_x.get_value(borrow=True).shape[0]
    n_test = test_set_x.get_value(borrow=True).shape[0]

    test_set_x = test_set_x.reshape((n_test, 1, 28, 28))
    valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28))
    train_set_x = train_set_x.reshape((n_train, 1, 28, 28))

    temp_train_set_x = theano.shared(numpy.zeros(train_set_x.shape.eval(),
                                                 dtype=theano.config.floatX),
                                     borrow=True)
    temp_train_set_xx = T.Rebroadcast((1, True))(temp_train_set_x)

    temp_valid_set_x = theano.shared(numpy.zeros(valid_set_x.shape.eval(),
                                                 dtype=theano.config.floatX),
                                     borrow=True)
    temp_valid_set_xx = T.Rebroadcast((1, True))(temp_valid_set_x)

    temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(),
                                                dtype=theano.config.floatX),
                                    borrow=True)
    temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x)

    n_train_batches = n_train // batch_size
    n_valid_batches = n_valid // batch_size
    n_test_batches = n_test // batch_size

    x = T.matrix('x')
    y = T.ivector('y')
    index = T.lscalar()
    dummy = T.ftensor4('dummy')

    update_train = (temp_train_set_x, dummy)
    update_valid = (temp_valid_set_x, dummy)
    update_test = (temp_test_set_x, dummy)

    replace_train = theano.function([dummy],
                                    temp_train_set_x,
                                    updates=[update_train])
    replace_valid = theano.function([dummy],
                                    temp_valid_set_x,
                                    updates=[update_valid])
    replace_test = theano.function([dummy],
                                   temp_test_set_x,
                                   updates=[update_test])

    print('... loading the model')

    layer0_input = x.reshape((batch_size, 1, 28, 28))

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    cost = layer3.negative_log_likelihood(y)

    params = layer3.params + layer2.params + layer1.params + layer0.params

    grads = T.grad(cost, params)

    updates = [(param_i,
                param_i - learning_rate * (grad_i + weight_decay * param_i))
               for param_i, grad_i in zip(params, grads)]

    patience_increase = 2
    improvement_threshold = 0.995

    start_time = timeit.default_timer()

    rand_trans_x = numpy.random.random_integers(-10, 10, 200)
    rand_trans_y = numpy.random.random_integers(-10, 10, 200)
    numpy.save('rand_trans_x.npy', rand_trans_x)
    numpy.save('rand_trans_y.npy', rand_trans_y)
    error_line = numpy.zeros(n_epochs)

    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            layer0.input: temp_test_set_xx[index * 500:(index + 1) * 500],
            y: test_set_y[index * 500:(index + 1) * 500]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            layer0.input: temp_valid_set_xx[index * 500:(index + 1) * 500],
            y: valid_set_y[index * 500:(index + 1) * 500]
        })

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            layer0.input: temp_train_set_xx[index * 500:(index + 1) * 500],
            y: train_set_y[index * 500:(index + 1) * 500]
        })

    print('... training')

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    patience = 20000
    validation_frequency = min(n_train_batches, patience // 2)
    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):

        horizontal = rand_trans_x[epoch]
        vertical = rand_trans_y[epoch]

        tran_test_set_x = theano_translation_updating(test_set_x, horizontal,
                                                      vertical).reshape(
                                                          (-1, 1, 28, 28))
        tran_valid_set_x = theano_translation_updating(valid_set_x, horizontal,
                                                       vertical).reshape(
                                                           (-1, 1, 28, 28))
        tran_train_set_x = theano_translation_updating(train_set_x, horizontal,
                                                       vertical).reshape(
                                                           (-1, 1, 28, 28))

        replace_test(tran_test_set_x)
        replace_valid(tran_valid_set_x)
        replace_train(tran_train_set_x)

        epoch = epoch + 1

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('Horizontal Shift:', horizontal, 'Vertical Shift:',
                      vertical)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                error_line[epoch - 1] = this_validation_loss

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \
        [layer0, layer1, layer2_input, layer2, layer3]

    with open(name + '.pkl', 'wb') as f:
        pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3],
                    f)

    error_line = error_line[0:epoch - 1] * 100
    scipy.io.savemat(name + '.mat', mdict={'Error_Spectrum': error_line})

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
def run():

    preProcess = PreProcess(load_in=True)
    data = preProcess.run()

    train_set_x, train_set_y = data[0], data[3]
    valid_set_x, valid_set_y = data[1], data[4]
    test_set_x, test_set_y = data[2], data[5]

    # network parameters
    num_kernels = [10, 10]
    kernel_sizes = [(9, 9), (5, 5)]
    #exit()
    sigmoidal_output_size = 20

    # training parameters
    learning_rate = 0.1
    batch_size = 50

    # Setup 2: compute batch sizes for train/test/validation
    # borrow=True gets us the value of the variable without making a copy.
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_test_batches /= batch_size
    n_valid_batches /= batch_size

    # Setup 3.
    # Declare inputs to network - x and y are placeholders
    # that will be used in the training/testing/validation functions below.
    x = T.matrix('x')  # input image data
    y = T.ivector('y')  # input label data

    # ## Layer 0 - First convolutional Layer
    # The first layer takes **`(batch_size, 1, 28, 28)`** as input, convolves it with **10** different **9x9** filters, and then downsamples (via maxpooling) in a **2x2** region.  Each filter/maxpool combination produces an output of size **`(28-9+1)/2 = 10`** on a side.
    # The size of the first layer's output is therefore **`(batch_size, 10, 10, 10)`**.

    layer0_input_size = (batch_size, 1, 100, 100)  # fixed size from the data
    edge0 = (100 - kernel_sizes[0][0] + 1) / 2
    layer0_output_size = (batch_size, num_kernels[0], edge0, edge0)
    # check that we have an even multiple of 2 before pooling
    assert ((100 - kernel_sizes[0][0] + 1) % 2) == 0

    # The actual input is the placeholder x reshaped to the input size of the network
    layer0_input = x.reshape(layer0_input_size)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=layer0_input_size,
                                filter_shape=(num_kernels[0], 1) +
                                kernel_sizes[0],
                                poolsize=(2, 2))

    # ## Layer 1 - Second convolutional Layer
    # The second layer takes **`(batch_size, 10, 10, 10)`** as input, convolves it with 10 different **10x5x5** filters, and then downsamples (via maxpooling) in a **2x2** region.  Each filter/maxpool combination produces an output of size **`(10-5+1)/2 = 3`** on a side.
    # The size of the second layer's output is therefore **`(batch_size, 10, 3, 3)`**.
    layer1_input_size = layer0_output_size
    edge1 = (edge0 - kernel_sizes[1][0] + 1) / 2
    layer1_output_size = (batch_size, num_kernels[1], edge1, edge1)
    # check that we have an even multiple of 2 before pooling
    assert ((edge0 - kernel_sizes[1][0] + 1) % 2) == 0

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=layer1_input_size,
                                filter_shape=(num_kernels[1], num_kernels[0]) +
                                kernel_sizes[1],
                                poolsize=(2, 2))

    # ## Layer 2 - Fully connected sigmoidal layer
    #exit()
    # The sigmoidal layer takes a vector as input.
    # We flatten all but the first two dimensions, to get an input of size **`(batch_size, 30 * 4 * 4)`**.

    #raw_random= raw_random.RandomStreamsBase()
    srng = theano.tensor.shared_randomstreams.RandomStreams(
        rng.randint(999999))

    #def rectify(X):
    #    return T.maximum(X,0.)

    def dropout(X, p=0.5):
        if p > 0:
            retain_prob = 1 - p
            X *= srng.binomial(X.shape,
                               p=retain_prob,
                               dtype=theano.config.floatX)
            X /= retain_prob
        return X

    layer2_input = layer1.output.flatten(2)

    layer2 = HiddenLayer(rng,
                         input=dropout(layer2_input),
                         n_in=num_kernels[1] * edge1 * edge1,
                         n_out=sigmoidal_output_size,
                         activation=T.tanh)

    # ## Layer 3 - Logistic regression output layer
    # A fully connected logistic regression layer converts the sigmoid's layer output to a class label.
    layer3 = LogisticRegression(input=layer2.output,
                                n_in=sigmoidal_output_size,
                                n_out=sport_n)

    # # Training the network
    # To train the network, we have to define a cost function.  We'll use the Negative Log Likelihood of the model, relative to the true labels **`y`**.

    # The cost we minimize during training is the NLL of the model.
    # Recall: y is a placeholder we defined above
    cost = layer3.negative_log_likelihood(y)

    # ### Gradient descent
    # We will train with Stochastic Gradient Descent.  To do so, we need the gradient of the cost relative to the parameters of the model.  We can get the parameters for each label via the **`.params`** attribute.

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # ## Update
    updates = [
        (param_i, param_i - learning_rate * grad_i)  # <=== SGD update step
        for param_i, grad_i in zip(params, grads)
    ]

    index = T.lscalar(
    )  # index to a batch of training/validation/testing examples

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) *
                           batch_size],  # <=== batching
            y: train_set_y[index * batch_size:(index + 1) *
                           batch_size]  # <=== batching
        })

    # ## Validation function
    # To track progress on a held-out set, we count the number of misclassified examples in the validation set.
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # ## Test function
    # After training, we check the number of misclassified examples in the test set.
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # # Training loop
    # We use SGD for a fixed number of iterations over the full training set (an "epoch").  Usually, we'd use a more complicated rule, such as iterating until a certain number of epochs fail to produce improvement in the validation set.
    for epoch in range(30):
        costs = [train_model(i) for i in xrange(n_train_batches)]
        validation_losses = [
            validate_model(i) for i in xrange(n_valid_batches)
        ]
        print "Epoch {}    NLL {:.2}    %err in validation set {:.1%}".format(
            epoch + 1, np.mean(costs), np.mean(validation_losses))

    # ## Learned features
    #filters = tile_raster_images(layer0.W.get_value(borrow=True), img_shape=(9, 9), tile_shape=(1,10), tile_spacing=(3, 3),
    #                       scale_rows_to_unit_interval=True,
    #                       output_pixel_vals=True)

    #plt.imshow(filters)
    #plt.show()

    # ## Check performance on the test set
    test_errors = [test_model(i) for i in range(n_test_batches)]
    print "test errors: {:.1%}".format(np.mean(test_errors))
Ejemplo n.º 9
0
input = T.matrix('input')
label = T.ivector('label')
batch_size = 100
nkerns = [10, 25]

layer0_input = input.reshape((batch_size, 1, 50, 50))

# layer 0: convolution
# input: batch_size * 1 * 50 * 50
# conv width: (50 - 5)/3 + 1 = 16
# pool width: 16 / 2 = 8
# output: batch_size * nkerns[0] * 8 * 8
layer0 = LeNetConvPoolLayer(rng,
                            input=layer0_input,
                            image_shape=(batch_size, 1, 50, 50),
                            filter_shape=(nkerns[0], 1, 5, 5),
                            poolsize=(2, 2),
                            stride=(3, 3))

# layer 1: convolution
# input: batch_size * nkerns[0] * 8 * 8
# conv width: (8 - 4) + 1 = 5
# pool width: 5 / 1 = 5
# output: batch_size * nkerns[1] * 5 * 5
layer1 = LeNetConvPoolLayer(rng,
                            input=layer0.output,
                            image_shape=(batch_size, nkerns[0], 8, 8),
                            filter_shape=(nkerns[1], nkerns[0], 4, 4),
                            poolsize=(1, 1),
                            stride=(1, 1))
Ejemplo n.º 10
0
def evaluate_lenet5(learning_rate, n_epochs, nkerns, batch_size):
    """
    Demonstrates lenet on a small sample of the cacophony dataset
    using a network consisting of:
    - two (convolutional + max pool) layers
    - one fully connected hidden layer
    - logistic regression to determine the final class from the hidden layer outputs

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels in each layer

    Adapted from convolutional_mlp::evaluate_lenet5
    """

    filter_size = 5  # number of pixels across for the convolutional filter

    rng = numpy.random.RandomState(
        23455)  # Use this one for the same result each time
    # rng = numpy.random.RandomState()

    datasets = load_data()

    # Image list, classification list
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.vector('y', "int64")  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 48 * 64)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (48, 64) is the size of cacophony small images. (height, width)
    layer0_input = x.reshape(
        (batch_size, 1, IMAGE_HEIGHTS[0], IMAGE_WIDTHS[0]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (48-5+1 , 64-5+1) = (44, 60)
    # maxpooling reduces this further to (44/2, 60/2) = (22, 30)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 22, 30)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, IMAGE_HEIGHTS[0], IMAGE_WIDTHS[0]),
        filter_shape=(nkerns[0], 1, FILTER_SIZES[0], FILTER_SIZES[0]),
        poolsize=(MAX_POOLING_SIZES[0], MAX_POOLING_SIZES[0]))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (22-5+1, 30-5+1) = (18, 26)
    # maxpooling reduces this further to (18/2, 26/2) = (9, 13)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 9, 13)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(
            batch_size, nkerns[0], IMAGE_HEIGHTS[1],
            IMAGE_WIDTHS[1]),  # previous layer generated 22*30 sized images
        filter_shape=(nkerns[1], nkerns[0], FILTER_SIZES[1], FILTER_SIZES[1]),
        poolsize=(MAX_POOLING_SIZES[1], MAX_POOLING_SIZES[1]))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 9 * 13),
    # or (1, 50 * 9 * 13) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * IMAGE_HEIGHTS[2] * IMAGE_WIDTHS[
            2],  # 9*13 is the number of pixels in the "image" from the previous layer
        n_out=batch_size,
        activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=batch_size,
                                n_out=5)  # n_out is the number of classes

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatches before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for minibatch_index in range(n_train_batches):
            iterator = (epoch - 1) * n_train_batches + minibatch_index

            if iterator % 100 == 0:
                print('training @ iterator = ', iterator)
            cost_ij = train_model(minibatch_index)

            if (iterator + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iterator * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iterator

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iterator:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    display_output(test_set_x, batch_size, layer0, nkerns[0])

    # display the final filters for the convolutional layers
    display_conv_filters("Layer 0", layer0)
    display_conv_filters("Layer 1", layer1)
Ejemplo n.º 11
0
    def __init__(self,
                 datasets,
                 nkerns=[32, 48],
                 batch_size=1000,
                 normalized_width=20,
                 distortion=0,
                 cuda_convnet=1,
                 params=[None, None, None, None, None, None, None, None]):
        """ Demonstrates Ciresan 2012 on MNIST dataset

        Some minor differences here:
        ---
        - Ciresan initializes Conv layers with: "uniform random distribution
            in the range [−0.05, 0.05]." (Ciresan IJCAI 2011)
        - Ciresan uses a sigma of 6
        - Ciresan uses nkerns=[20, 40] which were increased here to be nkerns=[32, 48]
            in order to be compatible with cuda_convnet

        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
                              gradient)

        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer

        :type dataset: string
        :param dataset: path to the dataset used for training /testing (MNIST here)

        :type nkerns: list of ints
        :param nkerns: number of kernels on each layer

        :type params: list of None or Numpy matricies/arrays
        :param params: W/b weights in the order: layer3W, layer3b, layer2W, layer2b, layer1W, layer1b, layer0W, layer0b
        """

        layer3W, layer3b, layer2W, layer2b, layer1W, layer1b, layer0W, layer0b = params
        rng = numpy.random.RandomState(23455)

        # TODO: could make this a theano sym variable to abstract
        # loaded data from column instantiation
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]

        # TODO: could move this to train method
        # compute number of minibatches for training, validation and testing
        self.n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        self.n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        self.n_train_batches /= batch_size
        self.n_valid_batches /= batch_size
        self.n_test_batches /= batch_size

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        learning_rate = T.fscalar()

        # start-snippet-1
        x = T.matrix('x')  # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the column'

        if distortion:
            distortion_layer = ElasticLayer(x.reshape((batch_size, 29, 29)),
                                            29,
                                            magnitude=ALPHA,
                                            sigma=SIGMA)

            network_input = distortion_layer.output.reshape(
                (batch_size, 1, 29, 29))
        else:
            network_input = x.reshape((batch_size, 1, 29, 29))

        if cuda_convnet:
            layer0_input = network_input.dimshuffle(1, 2, 3, 0)
        else:
            layer0_input = network_input

        layer0_imageshape = (1, 29, 29,
                             batch_size) if cuda_convnet else (batch_size, 1,
                                                               29, 29)
        layer0_filtershape = (1, 4, 4,
                              nkerns[0]) if cuda_convnet else (nkerns[0], 1, 4,
                                                               4)

        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=layer0_imageshape,
                                    filter_shape=layer0_filtershape,
                                    poolsize=(2, 2),
                                    cuda_convnet=cuda_convnet,
                                    W=layer0W,
                                    b=layer0b)

        layer1_imageshape = (nkerns[0], 13, 13,
                             batch_size) if cuda_convnet else (batch_size,
                                                               nkerns[0], 13,
                                                               13)
        layer1_filtershape = (nkerns[0], 5, 5,
                              nkerns[1]) if cuda_convnet else (nkerns[1],
                                                               nkerns[0], 5, 5)

        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=layer1_imageshape,
                                    filter_shape=layer1_filtershape,
                                    poolsize=(3, 3),
                                    cuda_convnet=cuda_convnet,
                                    W=layer1W,
                                    b=layer1b)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
        if cuda_convnet:
            layer2_input = layer1.output.dimshuffle(3, 0, 1, 2).flatten(2)
        else:
            layer2_input = layer1.output.flatten(2)

        layer2 = HiddenLayer(rng,
                             input=layer2_input,
                             n_in=nkerns[1] * 3 * 3,
                             n_out=150,
                             W=layer2W,
                             b=layer2b,
                             activation=T.tanh)

        # classify the values of the fully-connected sigmoidal layer
        layer3 = LogisticRegression(input=layer2.output,
                                    n_in=150,
                                    n_out=10,
                                    W=layer3W,
                                    b=layer3b)

        # the cost we minimize during training is the NLL of the model
        cost = layer3.negative_log_likelihood(y)

        # create a function to compute the mistakes that are made by the model
        self.test_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })

        # create a function to compute probabilities of all output classes
        self.test_output_batch = theano.function(
            [index],
            layer3.p_y_given_x,
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size]
            })

        self.validate_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        # create a list of all model parameters to be fit by gradient descent
        self.params = layer3.params + layer2.params + layer1.params + layer0.params
        self.column_params = [
            nkerns, batch_size, normalized_width, distortion, cuda_convnet
        ]

        # create a list of gradients for all model parameters
        grads = T.grad(cost, self.params)

        # train_model is a function that updates the model parameters by
        # SGD Since this model has many parameters, it would be tedious to
        # manually create an update rule for each model parameter. We thus
        # create the updates list by automatically looping over all
        # (params[i], grads[i]) pairs.
        updates = [(param_i, param_i - (learning_rate) * grad_i)
                   for param_i, grad_i in zip(self.params, grads)]

        # Suggested by Alex Krizhevsky, found on:
        # http://yyue.blogspot.com/2015/01/a-brief-overview-of-deep-learning.html
        optimal_ratio = 0.001
        # should show what multiple current learning rate is of optimal learning rate
        grads_L1 = sum([abs(grad).sum() for grad in grads])
        params_L1 = sum([abs(param).sum() for param in self.params])
        update_ratio = (learning_rate /
                        (optimal_ratio)) * (grads_L1 / params_L1)

        self.train_model = theano.function(
            [index, learning_rate], [cost, update_ratio],
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            })
Ejemplo n.º 12
0
def runDeepLearning():
    ### Loading training set and separting it into training set and testing set

    myDataset = Dataset()
    preprocess = 0
    datasets = myDataset.loadTrain(preprocess)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    dataset_test = myDataset.loadTest(preprocess)
    test_set_x, test_set_y, test_set_y_array = dataset_test[0]
    # temporary solution to get the ground truth of sample out to test_set_y_array.
    # the reason is that after T.cast, test_set_y becomes TensorVariable, which I do not find way to output its
    # value...anyone can help?

    ### Model parameters
    learning_rate = 0.02
    n_epochs = 3000
    nkerns = [
        30, 40, 40
    ]  # number of kernal at each layer, current best performance is 50.0% on testing set, kernal number is [30,40,40]
    batch_size = 500

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (48, 48)  # size of input images
    nClass = 7

    rng = np.random.RandomState(23455)
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, ishape[0], ishape[0]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, ishape[0],
                                             ishape[0]),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 22, 22),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(nkerns[0], nkerns[1], 9, 9),
                                filter_shape=(nkerns[2], nkerns[1], 2, 2),
                                poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=nClass)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    test_model = theano.function(
        [index],
        layer4.errorsLabel(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set

                    #test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_output = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_losses = [item[0] for item in test_output]
                    #test_y_gt = [label[0] for label in item[1] for item in test_output] #
                    test_y_pred = np.array(
                        [label for label in item[1] for item in test_output])
                    test_y_gt = np.array(
                        [label for label in item[2] for item in test_output])
                    #test_y_pred = np.array([item[1] for item in test_output] )
                    ## the predicted_labels for the input
                    ### it seems that the batchsize cannot be change in Theano.function while training model ###
                    #test_label = reduce(lambda x,y: x+y,test_label)

                    #print test_y_pred
                    #print test_y_gt
                    #print test_set_y_array

                    errorNum = np.count_nonzero(test_y_gt - test_y_pred)
                    errorSampleIndex = [
                        i for i in range(len(test_y_pred))
                        if test_y_pred[i] != test_set_y_array[i]
                    ]
                    #print errorNum, len(errorSampleIndex)

                    test_score = np.mean(test_losses)
                    print(('  epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    print((' on all test sample %f %%') %
                          ((float(errorNum) / float(len(test_y_pred)) * 100.)))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')

    #TODO: write the code to save the trained model and test the trained model on test data

    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # save the misclassified samples
    myDataset.plotSample(test_set_x.get_value(), test_set_y,
                         [i for i in range(0, 100)])
def Buildnet(params, nkerns=[20, 50], batch_size=500):

    rng = numpy.random.RandomState(23455)

    datasets = load_data(0)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (28, 28)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=3)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    f = theano.function(
        inputs=[index],
        outputs=[layer2.output, layer3.y_pred, y],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    #    numepoch = len(params)
    layer3.W.set_value(params[-1][0])
    layer3.b.set_value(params[-1][1])
    layer2.W.set_value(params[-1][2])
    layer2.b.set_value(params[-1][3])
    layer1.W.set_value(params[-1][4])
    layer1.b.set_value(params[-1][5])
    layer0.W.set_value(params[-1][6])
    layer0.b.set_value(params[-1][7])

    outputvectors = numpy.zeros((10000, 500))
    labels = numpy.zeros((10000, 1))
    reallabels = numpy.zeros((10000, 1))

    for minibatch_index in xrange(n_test_batches):

        vector, label, reallabel = f(minibatch_index)

        outputvectors[minibatch_index * batch_size:(minibatch_index + 1) *
                      batch_size] = vector
        labels[minibatch_index * batch_size:(minibatch_index + 1) * batch_size,
               0] = label
        reallabels[minibatch_index * batch_size:(minibatch_index + 1) *
                   batch_size, 0] = reallabel

    return [outputvectors, labels, reallabels]
def perturb_bfgs(perturbation,
                 params,
                 shape,
                 oldoutput,
                 c=1,
                 nkerns=[20, 50],
                 batch_size=1):

    #print '... building the model'
    rng = numpy.random.RandomState(23455)

    x = T.tensor4()
    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=3)

    f = theano.function(inputs=[x], outputs=[layer2.output, layer3.y_pred])

    layer3.W.set_value(params[-1][0])
    layer3.b.set_value(params[-1][1])
    layer2.W.set_value(params[-1][2])
    layer2.b.set_value(params[-1][3])
    layer1.W.set_value(params[-1][4])
    layer1.b.set_value(params[-1][5])
    layer0.W.set_value(params[-1][6])
    layer0.b.set_value(params[-1][7])

    perturbed = shape
    oldoutputs = oldoutput
    distances = 0
    perturblength = numpy.sqrt(numpy.sum(perturbation**2))
    shapes = perturbed + perturbation
    outputs, labels = f(shapes.reshape(1, 1, 28, 28))
    print labels
    for o in oldoutputs:
        distances += numpy.sqrt(numpy.sum((outputs - o)**2))
    distances /= len(oldoutputs)
    return c * perturblength + distances
def perturb_random(params, shape, oldoutput, nkerns=[20, 50], batch_size=500):

    print '... building the model'
    rng = numpy.random.RandomState(23455)

    x = T.tensor4()
    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    f = theano.function(inputs=[x], outputs=[layer2.output, layer3.y_pred])

    layer3.W.set_value(params[-1][0])
    layer3.b.set_value(params[-1][1])
    layer2.W.set_value(params[-1][2])
    layer2.b.set_value(params[-1][3])
    layer1.W.set_value(params[-1][4])
    layer1.b.set_value(params[-1][5])
    layer0.W.set_value(params[-1][6])
    layer0.b.set_value(params[-1][7])

    #    perturb 500 shapes at each iteration, with ptimes iterations
    perturbed = numpy.tile(shape[0], (500, 1))
    oldoutputs = numpy.tile(oldoutput, (500, 1))
    label = shape[1]
    ptimes = 500
    imagelength = numpy.sqrt(numpy.sum(shape[0]**2))
    outputlength = numpy.sqrt(numpy.sum(oldoutput**2))
    p = []
    s = []
    for i in range(ptimes):
        print 'perturbing ' + str(i) + ' ......'
        perturbation = numpy.random.normal(0, 0.15, perturbed.shape)
        perturblength = numpy.sqrt(numpy.sum(perturbation**2, axis=1))
        shapes = perturbed + perturbation
        outputs, labels = f(shapes.reshape(500, 1, 28, 28))
        distances = numpy.sum((outputs - oldoutputs)**2, axis=1)
        pos = numpy.argmax(distances)
        print 'distance ' + str(numpy.sqrt(distances[pos]))
        pert = {}
        pert['perturbation'] = perturbation[pos]
        pert['plength'] = perturblength[pos]
        pert['ilength'] = imagelength
        pert['olength'] = outputlength
        pert['distance'] = numpy.sqrt(distances[pos])
        pert['output'] = outputs[pos]
        pert['label'] = labels[pos]
        p.append(pert)
        if len(numpy.nonzero(labels != label)[0]) != 0:
            print 'success!' + str(label) + ' '
            pos = numpy.nonzero(labels != label)[0][0]
            print labels[pos]
            pert = {}
            pert['perturbation'] = perturbation[pos]
            pert['plength'] = perturblength[pos]
            pert['ilength'] = imagelength
            pert['olength'] = outputlength
            pert['distance'] = numpy.sqrt(distances[pos])
            pert['output'] = outputs[pos]
            pert['label'] = labels[pos]
            s.append(pert)
    return p, s
Ejemplo n.º 16
0
    def __init__(self, n_basket,  n_hidden, n_vocabulary, n_embedding_dimension):
        # n_window to put together a few records of input training (may enhance the sense of sequence)
        # Such that x is an n_embedding_dimension * n_window-dimensional vector
        # The transformation matrix of the word vector (broadly, the property vector of the item)

        iscnn = False
        iscostplus = True
        nkerns = n_embedding_dimension  # Seemingly can not equal! . !! . . . . . First keep
        filter_shape = (nkerns, 1, n_basket, 1)
        # rng = np.random.RandomState(23455)
        rng = np.random.RandomState(23456)
        poolsize = (1, n_embedding_dimension)

        print "1. Neuron parameter construction ............",
        embedding = np.random.uniform(-0.5, 0.5, (n_vocabulary, n_embedding_dimension)).astype(theano.config.floatX)
        embedding[-1] = 0.
        self.embedding = theano.shared(
            value=embedding.astype(theano.config.floatX),
            name='embedding',
            borrow=True
        )

        # Simply defining -1 as an attribute does not seem right, but you can not think of a good way to change it.
        #  X by u
        self.u = theano.shared(
            value=np.random.uniform(-0.5, 0.5, (nkerns, n_hidden)).astype(theano.config.floatX),
            # This dimension should be modified like the vector dimension of the cnn output
            name='u',
            borrow=True
        )

        #  H by w
        self.w = theano.shared(
            value=np.random.uniform(-0.5, 0.5, (n_hidden, n_hidden)).astype(theano.config.floatX),
            name='w',
            borrow=True
        )

        self.hidden_lay0 = theano.shared(
            value=np.zeros(n_hidden, dtype=theano.config.floatX),
            name='hidden_lay0',
            borrow=True
        )

        fan_in = np.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) // np.prod(poolsize))

        # initialize weights with random weights
        W_bound = np.sqrt(6. / (fan_in + fan_out))
        self.w_cnn = theano.shared(np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                                              dtype=theano.config.floatX), borrow=True)
        b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)

        self.b_cnn = theano.shared(value=b_values, borrow=True)

        input_item_id = T.lmatrix('input_item_id')   # The the input matrix
        input_size = T.lvector('input_size')
        neg_item_id = T.lmatrix('neg_item_id')
        x = self.embedding[input_item_id].reshape((input_item_id.shape[0], n_basket, n_embedding_dimension))
        x.name = 'x'
        # y = self.embedding[next_item_id].reshape((1, n_window * n_embedding_dimension))[0]
        neg = self.embedding[neg_item_id].reshape((neg_item_id.shape[0], n_basket, n_embedding_dimension))
        neg.name = 'neg'

        # After the embedding of the feature matrix through a cnn
        # . . . Note the convolution here first
        if iscnn:
            cnn_x = LeNetConvPoolLayer(
                rng,
                input=x.reshape((x.shape[0], 1, n_basket, n_embedding_dimension)),
                image_shape=(None, 1, n_basket,  n_embedding_dimension),
                # In fact image_shape almost no role in this variable, the first dimension casually write on the line
                filter_shape=filter_shape,
                W=self.w_cnn,
                b=self.b_cnn,
                poolsize=poolsize
            )
            cnn_x_output = cnn_x.output.flatten(2)
            self.param = (self.embedding, self.u, self.w, self.w_cnn, self.b_cnn)  # , self.v)
            self.name = ('embedding', 'u', 'w', 'w_cnn', 'b_cnn')
        else:
            def pooling_max(abasker_t, basket_size_t):
                pool_result_t = T.max(abasker_t[: basket_size_t], axis=0)
                return pool_result_t
            pool_result, _ = theano.scan(fn=pooling_max,
                                         sequences=[x.reshape((x.shape[0], n_basket, n_embedding_dimension)),
                                                    input_size])
            cnn_x = pool_result
            cnn_x_output = cnn_x.flatten(2)
            self.param = (self.embedding, self.u, self.w)
            self.name = ('embedding', 'u', 'w')

        print "done"

        print "2. Loss function construction ..............",

        def recurrence(x_t, h_tml):
            #  Defines the looping function
            h_t = T.nnet.sigmoid(T.dot(x_t, self.u) + T.dot(h_tml, self.w))
            return h_t

        h, _ = theano.scan(
            fn=recurrence,
            sequences=cnn_x_output,
            outputs_info=[self.hidden_lay0]
        )
        h.name = 'h'
        self.user_feature = h[-1, :]  #
        self.user_feature.name = 'user_feature'

        #  Loss function
        if iscostplus:
            def cla_cost(x_t, h_t):
                s_tt = T.dot((x[x_t+1][:input_size[x_t+1]] - neg[x_t+1][:input_size[x_t+1]]), h_t)
                s_t = T.sum(T.log(1 + T.exp(-s_tt)))
                return s_t
            s, _ = theano.scan(
                fn=cla_cost,
                sequences=[T.arange(x.shape[0]-1), h]
            )
            cost = T.sum(s)
        else:
            cost_temp = T.dot(x[-1][:input_size[-1]], h[-2]) - T.dot(neg[-1][:input_size[-1]], h[-2])
            cost = T.sum(T.log(1 + T.exp(-cost_temp)))

        print "done"

        print "3. Random gradient descending update formula ......",
        learning_rate = T.dscalar('learning_rate')
        lamda = T.dscalar('lamda')
        gradient = T.grad(cost, self.param)
        updates = [(p, p - learning_rate * (g + p * lamda)) for p, g in zip(self.param, gradient)]
        print "done"

        print "4. Predictive function definition ..............",
        y_pred = T.argsort(T.dot(self.embedding, self.user_feature)) # quel -6 fa prendere la top 5 in ordine crescente
        self.predict = theano.function(inputs=[input_item_id, input_size], outputs=y_pred)
        print "done"

        print "5. Training function definition ..............",
        self.train = theano.function(inputs=[input_item_id, neg_item_id, input_size, learning_rate, lamda],
                                     outputs=cost,
                                     updates=updates)
        print "done"

        print "6. Evaluation function definition ..............",
        self.evaluation_recall_6 = theano.function(inputs=[input_item_id, input_size], outputs=y_pred)
        print "done"

        self.normalize = theano.function(inputs=[],
                                         updates={self.embedding:\
                                         self.embedding/T.sqrt((self.embedding**2).sum(axis=1)).dimshuffle(0, 'x')*10})
Ejemplo n.º 17
0
batch_size = 1

# allocate symbolic variables for the data
#index = T.lscalar()  # index to a [mini]batch
x = T.matrix('x')  # the data is presented as rasterized images

# Reshape matrix of rasterized images of shape (1, 50*50)
# to a 4D tensor, compatible with our LeNetConvPoolLayer
layer0_input = x.reshape((batch_size, 1, 50, 50))

# Construct the first convolutional pooling layer:
# filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
# maxpooling reduces this further to (24/2,24/2) = (12,12)
# 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
        image_shape=(batch_size, 1, 50, 50), \
        filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2) \
        )

# Construct the second convolutional pooling layer
# filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
# maxpooling reduces this further to (8/2,8/2) = (4,4)
# 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
        image_shape=(batch_size, nkerns[0], 20, 20),
        filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) \
        )

# the TanhLayer being fully-connected, it operates on 2D matrices of
# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
# This will generate a matrix of shape (20,32*4*4) = (20,512)
layer2_input = layer1.output.flatten(2)
Ejemplo n.º 18
0
def display(params, digit, epoch, mode = 'mat', size = (56, 56)):

    #epoch contains a list of numbers to show
    #for example, epoch = [0, 2, 4] can show epoch 0 (original stage) and epoch 2 4
    #after running the CNN, params can be used directly, and can also use numpy.load('params.npy') to get
    #digit is a single digit of image set, for example, digit = train_set_x.get_value()[number]
    nkerns=[20, 50]
    rng = numpy.random.RandomState(23455)
    #show original digit
    if os.path.exists('digit') == 0:
        os.mkdir('digit')
    if mode == 'png':
        plt.figure(1)
        plt.gray()
        plt.axis('off')
        plt.imshow(digit.reshape(size))
        plt.savefig('digit/activity of layer0 (original digit).png')
        
    digit = digit.reshape(1, 1, size[0], size[1])
    
    inputdigit = T.tensor4()
    #building CNN with exactly the same parameters
    print '...building layer1'
    layer0_input = inputdigit
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
        image_shape=(1, 1, size[0], size[1]),
        filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))
    
    
    print '...building layer2'
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
        image_shape=(1, nkerns[0], (size[0] - 4) / 2, (size[1] - 4) / 2),
        filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))
    
    
    print '...building layer3'
    layer2_input = layer1.output.flatten(2)
    
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * (size[0] / 4 - 3) * (size[1] / 4 - 3),
                         n_out=500, activation=T.tanh)
    
    
    print '...building layer4'
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
    
    f = theano.function(inputs = [inputdigit], outputs = [layer0.conv_out, layer0.output, layer1.conv_out, layer1.output, layer2.output, layer3.p_y_given_x, layer3.y_pred])
    
    #export filters and activity in different epochs
    for num in epoch:
        
        print '...epoch ' + str(num)
        
        layer3.W.set_value(params[num][0])
        layer3.b.set_value(params[num][1])
        layer2.W.set_value(params[num][2])
        layer2.b.set_value(params[num][3])
        layer1.W.set_value(params[num][4])
        layer1.b.set_value(params[num][5])
        layer0.W.set_value(params[num][6])
        layer0.b.set_value(params[num][7])
        
        [conv0, output0, conv1, output1, output2, output3, y] = f(digit)
        
        if mode == 'png':
            plt.figure(2)
            plt.gray()
            for i in range(nkerns[0]):
                plt.subplot(4, 5, i + 1)
                plt.axis('off')
                plt.imshow(layer0.W.get_value()[i, 0])
            plt.savefig('digit/filter of layer1 in epoch ' + str(num) + '.png')
            
            plt.figure(3)
            plt.gray()
            for i in range(nkerns[1]):
                plt.subplot(5, 10, i + 1)
                plt.axis('off')
                plt.imshow(layer1.W.get_value()[i, 0])
            plt.savefig('digit/filter of layer2 in epoch ' + str(num) + '.png')
            
            plt.figure(4)
            plt.gray()
            plt.axis('off')
            plt.imshow(layer2.W.get_value())
            plt.savefig('digit/filter of layer3 in epoch ' + str(num) + '.png')
            
            plt.figure(5)
            plt.gray()
            plt.axis('off')
            plt.imshow(layer3.W.get_value())
            plt.savefig('digit/filter of layer4 in epoch ' + str(num) + '.png')
            
            plt.figure(6)
            plt.gray()
            for i in range(nkerns[0]):
                plt.subplot(4, 5, i + 1)
                plt.axis('off')
                plt.imshow(output0[0, i])
            plt.savefig('digit/activity of layer1 after downsampling in epoch ' + str(num) + '.png')
    
            plt.figure(7)
            plt.gray()
            plt.axis('off')
            for i in range(nkerns[1]):
                plt.subplot(5, 10, i + 1)
                plt.axis('off')
                plt.imshow(conv1[0, i])
            plt.savefig('digit/activity of layer2 before downsampling in epoch ' + str(num) + '.png')
    
            plt.figure(8)
            plt.gray()
            plt.axis('off')
            for i in range(nkerns[0]):
                plt.subplot(4, 5, i + 1)
                plt.axis('off')
                plt.imshow(conv0[0, i])
            plt.savefig('digit/activity of layer1 before downsampling in epoch ' + str(num) + '.png')
    
            plt.figure(9)
            plt.gray()
            for i in range(nkerns[1]):
                plt.subplot(5, 10, i + 1)
                plt.axis('off')
                plt.imshow(output1[0, i])
            plt.savefig('digit/activity of layer2 after downsampling in epoch ' + str(num) + '.png')
    
            plt.figure(10)
            plt.gray()
            plt.axis('off')
            plt.imshow(numpy.tile(output2, (10, 1)))
            plt.savefig('digit/activity of layer3 in epoch ' + str(num) + '.png')
    
            plt.figure(11)
            plt.gray()
            plt.axis('off')
            plt.imshow(numpy.tile(output3, (10, 1)))
            plt.savefig('digit/activity of layer4 in epoch ' + str(num) + '.png')

        if mode == 'mat':
            sio.savemat('digit in epoch ' + str(num) + '.mat', {'ActivityOfLayer0' : digit.reshape(size), 
            'ActivityOfLayer1before' : conv0[0],
            'ActivityOfLayer1after' : output0[0],
            'ActivityOfLayer2before' : conv1[0],
            'ActivityOfLayer2after' : output1[0],
            'ActivityOfLayer3' : output2,
            'ActivityOfLayer4' : output3,
            'FilterOfLayer1' : layer0.W.get_value()[:, 0, :, :],
            'FilterOfLayer2' : layer1.W.get_value()[:, 0, :, :],
            'FilterOfLayer3' : layer2.W.get_value(),
            'FilterOfLayer4' : layer3.W.get_value(),
            'y_predict' : y})

    return y
Ejemplo n.º 19
0
def evaluate_lenet5(learning_rate=0.05, n_epochs=10,
                    nkerns=[20, 50], batch_size=50):
    global train_dataset_route
    global valid_dataset_route
    global train_limit
    global valid_limit
    print train_dataset_route, type(train_dataset_route)
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data.load_spc_data(train_dataset_route, valid_dataset_route, train_limit, valid_limit)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (100, 100)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 100, 100))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, 100, 100),
            filter_shape=(nkerns[0], 1, 40, 40), poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 30, 30),
            filter_shape=(nkerns[1], nkerns[0], 15, 15), poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8,
                         n_out=100, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    '''
    test_model = theano.function([index], layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    test_results = theano.function(inputs=[index],
            outputs= layer3.y_pred,
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})
    '''
    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000 # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter , ' patience = ' , patience
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    layer0_state = layer0.__getstate__()
                    layer1_state = layer1.__getstate__()
                    layer2_state = layer2.__getstate__()
                    layer3_state = layer3.__getstate__()
                    trained_model_list = [layer0_state, layer1_state, layer2_state, layer3_state]
                    trained_model_array = numpy.asarray(trained_model_list)
                    classifier_file = open(train_model_route, 'w')
                    cPickle.dump([1,2,3], classifier_file, protocol=2)
                    numpy.save(classifier_file, trained_model_array)
                    classifier_file.close()

            if patience <= iter:
                done_looping = True
                print patience , iter
                break

    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
Ejemplo n.º 20
0
def create_cf_cnn(n_in, n_out, nkerns=[20, 50]):
    params_DB = shelve.open('params_cnn.dat')
    best_params = params_DB['params']
    params_DB.close()

    x = T.vector('x')

    rng = numpy.random.RandomState(1234)

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((1, 1, 30, 50))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(1, 1, 30, 50),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(1, nkerns[0], 13, 23),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 9,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)

    params = layer3.params + layer2.params + layer1.params + layer0.params

    #print best_params[0].get_value()

    for i in range(len(best_params)):
        try:
            inc = T.vector('inc')
            setvalue = theano.function([inc],
                                       params[i],
                                       updates=[(params[i], inc)])
            setvalue(best_params[i])
        except:
            try:
                inc = T.matrix('inc')
                setvalue = theano.function([inc],
                                           params[i],
                                           updates=[(params[i], inc)])
                setvalue(best_params[i])
            except:
                inc = T.tensor4('inc')
                setvalue = theano.function([inc],
                                           params[i],
                                           updates=[(params[i], inc)])
                setvalue(best_params[i])

    #print classifier.params[0].get_value()
    #print classifier.logRegressionLayer.W.get_value()

    vect = T.vector('vect')
    cf = theano.function(inputs=[vect],
                         outputs=layer3.p_y_given_x,
                         givens={x: vect})

    return cf
Ejemplo n.º 21
0
def evaluate_lenet5(dataset_route=DataHome+"DogVsCat_test_feature_2500.csv", \
                    nkerns=[20, 50], batch_size=5):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    trained_model_pkl = open(ModelHome + train_model_route, 'r')
    trained_model_state_list = cPickle.load(trained_model_pkl)
    trained_model_state_array = numpy.load(trained_model_pkl)
    layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array

    test_set = tdtf.read_data_to_ndarray(dataset_route, limit=None, header_n=0)
    test_set_x, id_arr = test_set
    datasets = load_data.shared_dataset(test_set)
    test_set_x, test_set_y = datasets
    print test_set_x.shape, test_set_y.shape

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (50, 50)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 50, 50))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
            image_shape=(batch_size, 1, 50, 50), \
            filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \
            W=layer0_state[0], b=layer0_state[1] \
            )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 20, 20),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \
            W=layer1_state[0], b=layer1_state[1] \
            )

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8,
                         n_out=100, activation=T.tanh,\
                         W=layer2_state[0], b=layer2_state[1] \
                         )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2, \
                                    W=layer3_state[0], b=layer3_state[1] \
                                )

    print "predicting"
    start_time = time.clock()
    # create a function to compute the mistakes that are made by the model
    test_results = theano.function(
        inputs=[index],
        outputs=layer3.y_pred,
        givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]})

    test_res = [test_results(i) for i in xrange(n_test_batches)]
    print test_res

    id_l = []
    label_l = []
    index = 0
    for arr in test_res:
        for label in arr:
            label_l.append(label)
            id_l.append(id_arr[index])
            index += 1
    tdtf.wr_to_csv(header=['id', 'label'],
                   id_list=id_l,
                   pred_list=label_l,
                   filename=test_label_route)
    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 22
0
def train(learning_rate=0.1, n_epochs=10, kernel_shapes = [7,5],
                    nkerns=[15,15], batch_size=1000, batch_type = 'fast',
                    mynet = 'best', representation='raw', momentum=0, history=4):

    # TODO: implement history of boards
    rng = numpy.random.RandomState(42)

    trainP = 0.998
    validP = 0.001
    testP  = 0.001    
    
    print "... Reading cached values ..."
    (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("results/lengths.cache",'r'))
    
    print "... Getting filenames ..."
    datasetKGS = "../../go-data"
    datasetPro = "../../pro-GoGod"
    # use both datasets, test and valid set are only Pro games
#    fn1 = readGame.getFilenames(datasetKGS,1,0,1)[0]
#    random.shuffle(fn1)    
#    fn2 = readGame.getFilenames(datasetPro,1,0,1)[0]
    # NOTE: last 5% of professional games never used!
#    fn2 = fn2[:int(len(fn2)*0.95)]
#    random.shuffle(fn2)
#    filenames = fn2 #fn1 + fn2
    n = len(filenames)
    print "... Learning set contains " + str(n) + " games"
    
    print "... Computing cumulative game lengths ..."
    trainNames = filenames[:int(trainP*n)]
    validNames = filenames[int(trainP*n):int(trainP*n+validP*n)]
    testNames  = filenames[int(trainP*n+validP*n):int(trainP*n+validP*n+testP*n)]
    
#    random.shuffle(trainNames)
    
#    trainCumLengths = readGame.getCumGameLengths(trainNames)
#    validCumLengths = readGame.getCumGameLengths(validNames)
#    testCumLengths = readGame.getCumGameLengths(testNames)
    
#    fw = open("results/"lengths.cache","wb")
#    pickle.dump((trainCumLengths,validCumLengths,testCumLengths,filenames),fw)
#    fw.close()
    print "... Preprocessing initial batches ..."
    minn = batch_size / 80 +1
    temp = time.time()
    test_batch_x, test_batch_y = utils.shared_dataset(readGame.processSGFs(testNames[:minn],representation),batch_size=batch_size)
    train_batch_x, train_batch_y = utils.shared_dataset(readGame.processSGFs(trainNames[:minn],representation),batch_size=batch_size)
    valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processSGFs(validNames[:minn],representation),batch_size=batch_size)
    print "    average processing time per game: " + str((time.time()-temp)/18.0) + " seconds, per epoch: " + str(int((time.time()-temp)/18*n/60/60)) + " hours" 

    # compute number of minibatches for training, validation and testing
    n_train_batches = trainCumLengths[-1]
    n_valid_batches = validCumLengths[-1]
    n_test_batches =  testCumLengths[-1]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    iteration = T.lscalar()  # iteration number of a minibatch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    gs = 19 # size of the go board
    ishape = (gs, gs)  # this is the size of MNIST images

    fw = open("results/"+mynet+"_"+str(learning_rate)+"_"+str(nkerns[0])+".res","w")
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... Building the model ...'
   
    nc = 2 if representation=='raw' else 6  # if raw
    nc *= 1+history
    
    if mynet == "default":
        # default is 7x7, regular 3 kernels
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, nc, gs, gs),
                filter_shape=(nkerns[0], nc, 7, 7), poolsize=(1, 1))
        layer2_input = layer0.output.flatten(2)
        layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[0] * 13 * 13,
                           n_out=500, activation=T.tanh)
        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=361)
        cost = layer3.negative_log_likelihood(y)
 
#        prevGrads = [theano.shared(numpy.zeros((500,361),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((361,),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((nkerns[0] *13*13,500), dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((500,),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((nkerns[0],nc,7,7),dtype=theano.config.floatX),borrow=True),
#                 theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True),
#                ]
        params = layer3.params + layer2.params + layer0.params
    
   
    if mynet == "best":
        ks = kernel_shapes
        sp1= gs-ks[0]+1
        sp2= sp1-ks[1]+1
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, nc, gs, gs),
                filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1))
        layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                image_shape=(batch_size, nkerns[0], sp1, sp1),
                filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1))
        layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs)
        cost = layer3.negative_log_likelihood(y)
    
        prevGrads = [theano.shared(numpy.zeros((nkerns[1]*9*9,361),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((gs*gs,),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[0],nkerns[1],ks[1],ks[1]), dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[1],),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[0],nc,ks[0],ks[0]),dtype=theano.config.floatX),borrow=True),
                 theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True),
                ]
        params = layer3.params + layer1.params + layer0.params

    if mynet == "padded":       # TODO: add zero padding test deeper architectures
        ks = kernel_shapes
        sp1= gs-ks[0]+1
        sp2= sp1-ks[1]+1
        layer0_input = x.reshape((batch_size, nc, gs, gs))
        layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=(batch_size, nc, gs, gs),
                filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1))
        layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                image_shape=(batch_size, nkerns[0], sp1, sp1),
                filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1))
        layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs)
        cost = layer3.negative_log_likelihood(y)
    
        params = layer3.params + layer1.params + layer0.params
        
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([], layer3.errors(y),
             givens={
                x: test_batch_x,
                y: T.cast(test_batch_y, 'int32')})

    validate_model = theano.function([], layer3.errors(y),
             givens={
                x: valid_batch_x,
                y: T.cast(valid_batch_y, 'int32')})

    predictions = theano.function([], layer3.get_predictions(),
            givens={
                x: valid_batch_x})
                
    conditional_dist = theano.function([], layer3.get_conditional_dist(),
            givens={
                x: valid_batch_x})

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    #adjusted_rate = learning_rate - iteration*(learning_rate/(float(n_epochs) * n_train_batches))
    adjusted_rate = learning_rate if T.lt(iteration,3000*200) else 0.1*learning_rate
    
    for param_i, grad_i in zip(params, grads):#, prev_grad_i   , prevGrads):
        updates.append((param_i, param_i - adjusted_rate * grad_i))# - momentum * prev_grad_i))
    
    #for i,grad in enumerate(grads):
    #    updates.append((prevGrads[i], grad))
    
    train_model = theano.function([iteration], cost, updates=updates,
         givens={
            x: train_batch_x,
            y: T.cast(train_batch_y, 'int32')},on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... Training ...'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.999  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = 10000         # min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    stime = time.time()

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 1000 == 0:
                print 'training @ iter = ', iter
                pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w"))
            if iter ==5:
                print 'estimated train time per epoch = '+ str((time.time() - stime) * n_train_batches/60.0/iter/60.0) + " hours"
            ax,ay = getBatch(trainNames, minibatch_index, trainCumLengths, batch_size,representation,batchType=batch_type,history=history)
            train_batch_x.set_value(ax)
            train_batch_y.set_value(ay)
            cost_ij = train_model(iter)

            if (iter + 1) % validation_frequency == 0 or iter==5:

                # compute zero-one loss on validation set
                validation_losses = []
                for i in xrange(n_valid_batches):
                    vx,vy = getBatch(validNames, i, validCumLengths, batch_size,representation,batchType='fast',history=history)
                    valid_batch_x.set_value(vx)
                    valid_batch_y.set_value(vy)
                    validation_losses.append(validate_model())
                this_validation_loss = numpy.mean(validation_losses)
        
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses=[]
                    for i in xrange(n_test_batches):
                        tx,ty = getBatch(testNames, i, testCumLengths, batch_size,representation,batchType='fast',history=history)
                        test_batch_x.set_value(tx)
                        test_batch_y.set_value(ty)
                        test_losses.append(test_model())
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

        fw.write("Epoch "+str(epoch) + ": " +str((1-this_validation_loss)*100.)+ "%\n")
        pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w"))
        
            #if patience <= iter:
            #    done_looping = True
            #    break

    fw.close()
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 23
0
# allocate symbolic variables for the data
index = T.lscalar()  # index to a [mini]batch
x = T.matrix('x')   # the data is presented as rasterized images
y = T.ivector('y')  # the labels are presented as 1D vector of
                    # [int] labels

# Reshape matrix of rasterized images of shape (1, 50*50)
# to a 4D tensor, compatible with our LeNetConvPoolLayer
layer0_input = x.reshape((batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1]))

# Construct the first convolutional pooling layer:
# filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
# maxpooling reduces this further to (24/2,24/2) = (12,12)
# 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
        image_shape=(batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1]), \
        filter_shape=(nkerns[0], 1, filter0_shape[0], filter0_shape[1]), poolsize=(2, 2) \
        )

# Construct the second convolutional pooling layer
# filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
# maxpooling reduces this further to (8/2,8/2) = (4,4)
# 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
        image_shape=(batch_size, nkerns[0], layer1_input_img_size[0], layer1_input_img_size[1]),
        filter_shape=(nkerns[1], nkerns[0], filter1_shape[0], filter1_shape[1]), poolsize=(2, 2) \
        )

# the TanhLayer being fully-connected, it operates on 2D matrices of
# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
# This will generate a matrix of shape (20,32*4*4) = (20,512)
layer2_input = layer1.output.flatten(2)
Ejemplo n.º 24
0
def load_trained_model():
    global if_load_trained_model
    global train_model_route 
    global layer0_input 
    global layer0 
    global layer1 
    global layer2_input 
    global layer2 
    global layer3 
    global test_results

    global layer0_input_img_size # ishape
    global filter0_shape
    global layer1_input_img_size
    global filter1_shape
    global layer2_input_img_size
    global layer2_out

    if_load_trained_model = True
    print "loading trained model for the first time"
    trained_model_pkl = open(train_model_route, 'r')
    trained_model_state_list = cPickle.load(trained_model_pkl)
    trained_model_state_array = numpy.load(trained_model_pkl)
    layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... loading the model'

    # Reshape matrix of rasterized images of shape (1, 50*50)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \
            image_shape=(batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1]), \
            filter_shape=(nkerns[0], 1, filter0_shape[0], filter0_shape[1]), poolsize=(2, 2), \
                W=layer0_state[0], b=layer0_state[1] \
            )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], layer1_input_img_size[0], layer1_input_img_size[1]),
            filter_shape=(nkerns[1], nkerns[0], filter1_shape[0], filter1_shape[1]), poolsize=(2, 2), \
            W=layer1_state[0], b=layer1_state[1] \
            )

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * layer2_input_img_size[0] * layer2_input_img_size[1],
                         n_out=layer2_out, activation=T.tanh, \
                         W=layer2_state[0], b=layer2_state[1] \
                         )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=layer2_out, n_out=N_OUT, \
                                    W=layer3_state[0], b=layer3_state[1] \
                                )

    test_results = theano.function(inputs=[x], \
        outputs= layer3.y_pred)
Ejemplo n.º 25
0
######################
print '... building the model'

# Reshape matrix of rasterized images of shape (batch_size, 64 * 64)
# to a 4D tensor, compatible with our LeNetConvPoolLayer
# (28, 28) is the size of MNIST images.
layer0_input = x.reshape((batch_size, 1, 64,64))

# Construct the first convolutional pooling layer:
# filtering reduces the image size to (64-5+1 , 64-5+1) = (60, 60)
# maxpooling reduces this further to (60/2, 60/2) = (30, 30)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 1, 64, 64),
    filter_shape=(nkerns[0], 1, 5, 5),
    poolsize=(2, 2)
)

# Construct the second convolutional pooling layer
# filtering reduces the image size to (30-5+1, 30-5+1) = (26, 26)
# maxpooling reduces this further to (26/2, 26/2) = (13,13)
# 4D output tensor is thus of shape (nkerns[0], nkerns[1], 13, 13)
layer1 = LeNetConvPoolLayer(
    rng,
    input=layer0.output,
    image_shape=(batch_size, nkerns[0], 30, 30),
    filter_shape=(nkerns[1], nkerns[0], 5, 5),
    poolsize=(2, 2)
)