Ejemplo n.º 1
0
    def __init__(self, rng, X_data, y_data, batch_size, training_enabled,
                 layer_ndo_p, L2_reg, lrelu_alpha):

        layer0 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=X_data,  ## extreme augmented data input
            filter_shape=(320, 3, 2, 2),
            image_shape=(batch_size, 3, 126, 126),
            ssample=(1, 1),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer1 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer0.output,
            filter_shape=(320, 320, 2, 2),
            image_shape=(batch_size, 320, 125, 125),
            ssample=(1, 1),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer2 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer1.output,
            filter_shape=(320, 320, 2, 2),
            image_shape=(batch_size, 320, 124, 124),
            ssample=(2, 2),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer3 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer2.output,
            filter_shape=(640, 320, 2, 2),
            image_shape=(batch_size, 320, 62, 62),
            ssample=(1, 1),
            bordermode='valid',
            p=0.9,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer4 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer3.output,
            filter_shape=(640, 640, 2, 2),
            image_shape=(batch_size, 640, 61, 61),
            ssample=(1, 1),
            bordermode='valid',
            p=0.9,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer5 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer4.output,
            filter_shape=(640, 640, 2, 2),
            image_shape=(batch_size, 640, 60, 60),
            ssample=(2, 2),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer6 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer5.output,
            filter_shape=(960, 640, 2, 2),
            image_shape=(batch_size, 640, 30, 30),
            ssample=(1, 1),
            bordermode='valid',
            p=0.8,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer7 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer6.output,
            filter_shape=(960, 960, 2, 2),
            image_shape=(batch_size, 960, 29, 29),
            ssample=(1, 1),
            bordermode='valid',
            p=0.8,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer8 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer7.output,
            filter_shape=(960, 960, 2, 2),
            image_shape=(batch_size, 960, 28, 28),
            ssample=(2, 2),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer9 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer8.output,
            filter_shape=(1280, 960, 2, 2),
            image_shape=(batch_size, 960, 14, 14),
            ssample=(1, 1),
            bordermode='valid',
            p=0.7,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer10 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer9.output,
            filter_shape=(1280, 1280, 2, 2),
            image_shape=(batch_size, 1280, 13, 13),
            ssample=(1, 1),
            bordermode='valid',
            p=0.7,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer11 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer10.output,
            filter_shape=(1280, 1280, 2, 2),
            image_shape=(batch_size, 1280, 12, 12),
            ssample=(2, 2),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer12 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer11.output,
            filter_shape=(1600, 1280, 2, 2),
            image_shape=(batch_size, 1280, 6, 6),
            ssample=(1, 1),
            bordermode='valid',
            p=0.6,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer13 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer12.output,
            filter_shape=(1600, 1600, 2, 2),
            image_shape=(batch_size, 1600, 5, 5),
            ssample=(1, 1),
            bordermode='valid',
            p=0.6,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer14 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer13.output,
            filter_shape=(1600, 1600, 2, 2),
            image_shape=(batch_size, 1600, 4, 4),
            ssample=(2, 2),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer15 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer14.output,
            filter_shape=(1920, 1600, 2, 2),
            image_shape=(batch_size, 1600, 2, 2),
            ssample=(1, 1),
            bordermode='valid',
            p=0.5,
            alpha=lrelu_alpha  ##leaky relu
        )

        layer16 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer15.output,
            filter_shape=(1920, 1920, 1, 1),
            image_shape=(batch_size, 1920, 1, 1),
            ssample=(1, 1),
            bordermode='valid',
            p=0.5,
            alpha=0.5  ##leaky relu
        )

        layer17 = myConvLayer(
            rng,
            is_train=training_enabled,
            input_data=layer16.output,
            filter_shape=(10, 1920, 1, 1),
            image_shape=(batch_size, 1920, 1, 1),
            ssample=(1, 1),
            bordermode='valid',
            p=1.0,
            alpha=lrelu_alpha  ##leaky relu
        )

        softmax_input = layer17.output.flatten(2)

        softmax_layer = SoftmaxWrapper(input_data=softmax_input,
                                       n_in=10,
                                       n_out=10)

        self.errors = softmax_layer.errors(y_data)

        L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() +
                  (layer2.W**2).sum() + (layer3.W**2).sum() +
                  (layer4.W**2).sum() + (layer5.W**2).sum() +
                  (layer6.W**2).sum() + (layer7.W**2).sum() +
                  (layer8.W**2).sum() + (layer9.W**2).sum() +
                  (layer10.W**2).sum() + (layer11.W**2).sum() +
                  (layer12.W**2).sum() + (layer13.W**2).sum() +
                  (layer14.W**2).sum() + (layer15.W**2).sum() +
                  (layer16.W**2).sum() + (layer17.W**2).sum())

        # the cost we minimize during training is the NLL of the model
        self.cost = (softmax_layer.negative_log_likelihood(y_data) +
                     L2_reg * L2_sqr)

        self.params = layer17.params + layer16.params + layer15.params + layer14.params + layer13.params + layer12.params + layer11.params + layer10.params + layer9.params + layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

        self.input = X_data
        self.y = y_data
def test_ModelC_AllCNN(learning_rate=0.05,
                       n_epochs=350,
                       batch_size=200,
                       L2_reg=0.001,
                       input_ndo_p=0.8,
                       layer_ndo_p=0.5,
                       save_model=True,
                       save_freq=50):
    """
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer
    
    :type batch_size: int
    :param batch_size: the number of training examples per batch
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data2()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    print 'n_train_batches: ', n_train_batches
    print 'n_valid_batches: ', n_valid_batches
    print 'n_test_batches: ', n_test_batches

    learning_rate = numpy.asarray(learning_rate, dtype=numpy.float32)
    print 'learning_rate: ', learning_rate

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    lr = T.fscalar()
    training_enabled = T.iscalar('training_enabled')

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # drop the input only while training, don't drop while testing
    dropout_input = T.switch(T.neq(training_enabled, 0),
                             drop(layer0_input, p=input_ndo_p),
                             input_ndo_p * layer0_input)

    layer0 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=dropout_input,
                         filter_shape=(96, 3, 3, 3),
                         image_shape=(batch_size, 3, 32, 32),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer1 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer0.output,
                         filter_shape=(96, 96, 3, 3),
                         image_shape=(batch_size, 96, 32, 32),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer2 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer1.output,
                         filter_shape=(96, 96, 3, 3),
                         image_shape=(batch_size, 96, 32, 32),
                         ssample=(2, 2),
                         bordermode='half',
                         p=layer_ndo_p)

    layer3 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer2.output,
                         filter_shape=(192, 96, 3, 3),
                         image_shape=(batch_size, 96, 16, 16),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer4 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer3.output,
                         filter_shape=(192, 192, 3, 3),
                         image_shape=(batch_size, 192, 16, 16),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer5 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer4.output,
                         filter_shape=(192, 192, 3, 3),
                         image_shape=(batch_size, 192, 16, 16),
                         ssample=(2, 2),
                         bordermode='half',
                         p=layer_ndo_p)

    layer6 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer5.output,
                         filter_shape=(192, 192, 3, 3),
                         image_shape=(batch_size, 192, 8, 8),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer7 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer6.output,
                         filter_shape=(192, 192, 1, 1),
                         image_shape=(batch_size, 192, 8, 8),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    layer8 = myConvLayer(rng,
                         is_train=training_enabled,
                         input_data=layer7.output,
                         filter_shape=(10, 192, 1, 1),
                         image_shape=(batch_size, 192, 8, 8),
                         ssample=(1, 1),
                         bordermode='half',
                         p=1.0)

    # make sure this is what global averaging does
    global_average = layer8.output.mean(axis=(2, 3))

    softmax_layer = SoftmaxWrapper(input_data=global_average,
                                   n_in=10,
                                   n_out=10)

    L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() +
              (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() +
              (layer6.W**2).sum() + (layer7.W**2).sum() + (layer8.W**2).sum())

    # the cost we minimize during training is the NLL of the model
    cost = (softmax_layer.negative_log_likelihood(y) + L2_reg * L2_sqr)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        softmax_layer.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    validate_model = theano.function(
        [index],
        softmax_layer.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.

    momentum = theano.shared(numpy.cast[theano.config.floatX](0.9),
                             name='momentum')
    updates = []
    for param in params:
        param_update = theano.shared(param.get_value() *
                                     numpy.cast[theano.config.floatX](0.))
        updates.append((param, param - lr * param_update))
        updates.append((param_update, momentum * param_update +
                        (numpy.cast[theano.config.floatX](1.) - momentum) *
                        T.grad(cost, param)))

    train_model = theano.function(
        [index, lr],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](1)
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    #     patience = 10000  # look as this many examples regardless
    #     patience_increase = 2  # wait this much longer when a new best is found

    #     improvement_threshold = 0.995  # a relative improvement of this much is considered significant

    #    validation_frequency = min(n_train_batches, patience // 2)

    validation_frequency = n_train_batches // 2

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    updateLRAfter = 200

    while (epoch < n_epochs) and (not done_looping):

        # shuffle data before starting the epoch

        epoch = epoch + 1
        if (epoch > updateLRAfter):
            learning_rate *= 0.1
            updateLRAfter += 50
            print 'epoch: ', epoch
            print 'updateLRAfter: ', updateLRAfter
            print 'learning_rate: ', learning_rate

        for minibatch_index in range(n_train_batches):
            #print 'epoch: {0}, minibatch: {1}'.format(epoch, minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index
            if iter % 50 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, learning_rate)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    #                     if this_validation_loss < best_validation_loss *  \
                    #                        improvement_threshold:
                    #                         patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))


#             if patience <= iter:
#                 done_looping = True
#                 break

        if save_model and epoch % save_freq == 0:
            # add model name to the file to differentiate different models
            with gzip.open('parameters_epoch_{0}.pklz'.format(epoch),
                           'wb') as fp:
                cPickle.dump([param.get_value() for param in params],
                             fp,
                             protocol=2)

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' + os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), sys.stderr)
Ejemplo n.º 3
0
    def __init__(self, rng, X_data, y_data, batch_size, training_enabled,
                 layer_ndo_p, L2_reg):
        # border mode: half means conv2d will pad filter_size//2 zeros on all four sides and then use stride=1
        layer0 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=X_data,
                             filter_shape=(96, 3, 3, 3),
                             image_shape=(batch_size, 3, 32, 32),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        layer1 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer0.output,
                             filter_shape=(96, 96, 3, 3),
                             image_shape=(batch_size, 96, 32, 32),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        layer2 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer1.output,
                             filter_shape=(96, 96, 3, 3),
                             image_shape=(batch_size, 96, 32, 32),
                             ssample=(2, 2),
                             bordermode='half',
                             p=layer_ndo_p)

        layer3 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer2.output,
                             filter_shape=(192, 96, 3, 3),
                             image_shape=(batch_size, 96, 16, 16),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        layer4 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer3.output,
                             filter_shape=(192, 192, 3, 3),
                             image_shape=(batch_size, 192, 16, 16),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        layer5 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer4.output,
                             filter_shape=(192, 192, 3, 3),
                             image_shape=(batch_size, 192, 16, 16),
                             ssample=(2, 2),
                             bordermode='half',
                             p=layer_ndo_p)

        layer6 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer5.output,
                             filter_shape=(192, 192, 3, 3),
                             image_shape=(batch_size, 192, 8, 8),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        layer7 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer6.output,
                             filter_shape=(192, 192, 1, 1),
                             image_shape=(batch_size, 192, 8, 8),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        layer8 = myConvLayer(rng,
                             is_train=training_enabled,
                             input_data=layer7.output,
                             filter_shape=(10, 192, 1, 1),
                             image_shape=(batch_size, 192, 8, 8),
                             ssample=(1, 1),
                             bordermode='half',
                             p=1.0)

        global_average = layer8.output.mean(axis=(2, 3))

        softmax_layer = SoftmaxWrapper(input_data=global_average,
                                       n_in=10,
                                       n_out=10)

        self.errors = softmax_layer.errors(y_data)

        L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() +
                  (layer2.W**2).sum() + (layer3.W**2).sum() +
                  (layer4.W**2).sum() + (layer5.W**2).sum() +
                  (layer6.W**2).sum() + (layer7.W**2).sum() +
                  (layer8.W**2).sum())

        # the cost we minimize during training is the NLL of the model
        self.cost = (softmax_layer.negative_log_likelihood(y_data) +
                     L2_reg * L2_sqr)

        self.params = layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

        self.input = X_data
        self.y = y_data