コード例 #1
0
ファイル: CNN.py プロジェクト: zhangyipin/MyDeepByTheano
    def __init__(self, rng, input, n_hidden_out, n_out, nkerns, batch_size):

        self.layer0 = LeNetConvPoolLayer(rng,
                                         input=input.reshape(
                                             (batch_size, 1, 28, 28)),
                                         image_shape=(batch_size, 1, 28, 28),
                                         filter_shape=(nkerns[0], 1, 5, 5),
                                         poolsize=(2, 2))

        self.layer1 = LeNetConvPoolLayer(rng,
                                         input=self.layer0.output,
                                         image_shape=(batch_size, nkerns[0],
                                                      12, 12),
                                         filter_shape=(nkerns[1], nkerns[0], 5,
                                                       5),
                                         poolsize=(2, 2))
        self.layer2 = HiddenLayer(rng,
                                  input=self.layer1.output.flatten(2),
                                  n_in=nkerns[1] * 4 * 4,
                                  n_out=n_hidden_out,
                                  activation=T.tanh)
        self.logRegressionLayer = LogisticRegression(input=self.layer2.output,
                                                     n_in=n_hidden_out,
                                                     n_out=n_out)
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likehood)

        self.errors = self.logRegressionLayer.errors

        self.params = self.layer0.params + self.layer1.params + self.layer2.params + self.logRegressionLayer.params

        self.input = input
コード例 #2
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = theano_rng = RandomStreams(numpy_rng.randint(2**30))

        self.x = T.matrix('x')
        self.y = T.ivector('y')
        for i in range(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            self.params.extend(sigmoid_layer.params)

            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)

        self.params.extend(self.logLayer.params)

        self.finetune_cost = self.logLayer.negative_log_likehood(self.y)

        self.errors = self.logLayer.errors(self.y)
コード例 #3
0
    def __init__(self, np_rng, theano_rng=None, n_ins=784, hidden_layer_sizes=[500, 500], n_outs=10):
        
        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layer_sizes)
        
        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(np_rng.randint(2 ** 30))
     
        self.x = T.matrix('x') 
        self.y = T.ivector('y') 
        
        for i in xrange(self.n_layers):
            if i == 0:
                n_in = n_ins
                layer_input = self.x
            else:
                n_in = hidden_layer_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output

            n_out = hidden_layer_sizes[i]            
            
            sigmoid_layer = HiddenLayer(np_rng, layer_input, n_in, n_out, activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            
            self.params.extend(sigmoid_layer.params)
            
            dA_layer = AutoEncoder(np_rng, n_in, n_out, theano_rng=theano_rng, input=layer_input, 
                                   W=sigmoid_layer.W, b_hid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
            
        self.log_layer = LogisticRegression(self.sigmoid_layers[-1].output, self.y, hidden_layer_sizes[-1], n_outs)
        self.params.extend(self.log_layer.params)

        self.finetune_cost = self.log_layer.negative_log_likelihood()
        self.errors = self.log_layer.errors()        
コード例 #4
0
ファイル: CNN.py プロジェクト: arasharchor/deepmodel
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=200,
                    dataset='emotion',
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = Ld.load_share(dataset)
    if dataset == 'mnist':
        ishape = (28, 28)  # this is the size of MNIST images
        num_label = 10
    elif dataset == 'emotion':
        ishape = (48, 48)  # this is the size of MNIST images
        num_label = 7

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, ishape[0], ishape[1]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, ishape[0],
                                             ishape[1]),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    if dataset == 'emotion':
        layer05 = LeNetConvPoolLayer(rng,
                                     input=layer0.output,
                                     image_shape=(batch_size, nkerns[0], 22,
                                                  22),
                                     filter_shape=(nkerns[0], nkerns[0], 3, 3),
                                     poolsize=(2, 2))
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer05.output,
                                    image_shape=(batch_size, nkerns[0], 10,
                                                 10),
                                    filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                    poolsize=(2, 2))
    elif dataset == 'mnist':
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(batch_size, nkerns[0], 12,
                                                 12),
                                    filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                    poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=num_label)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(
                        ('     epoch %i, minibatch %i/%i, test error of best '
                         'model %f %%') % (epoch, minibatch_index + 1,
                                           n_train_batches, test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
コード例 #5
0
def evaluate_lenet5(learning_rate=0.15, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 20], batch_size=500):
    """ Demonstrates lenet on CIFAR-10 dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    def shared_dataset(data_xy, borrow=True):

        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
    data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
    data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
    data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
    data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
    test = unpickle('cifar-10-batches-py/test_batch')

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"],
                                 data_batch_4["labels"], data_batch_5["labels"]))

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    learning_rate = theano.shared(learning_rate)

    """whitening"""

    """
    Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important)
    cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data
    Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100])

    # whiten the data:
    # divide by the eigenvalues (which are square roots of the singular values)
    Xwhite = Xrot / numpy.sqrt(S + 1e-5)"""

    """whitening"""

    #Xtr_rows = whiten(Xtr_rows)
    # zero-center the data (important)
    """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)

    Xtr_rows = Xrot / numpy.sqrt(S + 1e-5)

    Xval_rot = numpy.dot(Xval_rows,U)
    Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5)

    Xte_rot = numpy.dot(Xte_rows,U)
    Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5)
    """

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
                (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32+4-5+1 , 32+4-5+1) = (32, 32)
    # maxpooling reduces this further to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(nkerns[0], 3, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (16+4-5+1, 16+4-5+1) = (16, 16)
    # maxpooling reduces this further to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 16, 16),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 8 * 8,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    L2_reg = 0.001
    L2_sqr = (
            (layer2.W ** 2).sum()
            + (layer3.W ** 2).sum()
        )

    cost = layer3.negative_log_likelihood(y)  + L2_reg * L2_sqr

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    epoch_loss_list = []
    epoch_val_list = []

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        if epoch == 10:
            learning_rate.set_value(0.1)
        # if epoch > 30:
        #    learning_rate.set_value(learning_rate.get_value()*0.9995)
        if epoch > 3:
            epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
            epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))
            numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np,
                          fmt='%1.3f')
            numpy.savetxt(fname='epoc_val_error.csv', X=epoch_val_np,
                          fmt='%1.3f')

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            epoch_loss_entry = [iter, epoch, float(cost_ij)]
            epoch_loss_list.append(epoch_loss_entry)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                epoch_val_entry = [iter, epoch, this_validation_loss]
                epoch_val_list.append(epoch_val_entry)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)

    epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
    epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))

    epoch_loss = pandas.DataFrame({"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1],
                                   "cost": epoch_loss_np[:, 2]})
    epoch_vall = pandas.DataFrame({"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1],
                                   "val_error": epoch_val_np[:, 2]})
    epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(['epoch']).mean()["cost"])
    epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(['epoch']).mean()["val_error"])
    epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]})
    epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]})
    epoc_avg_loss.plot(kind="line", x="epoch", y="cost")
    plt.show()
    epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error")
    plt.show()
コード例 #6
0
    def __init__(self, n_inp=784, n_out=10, hidden_layer_sizes=[500, 500]):
        """ This class is made to support a variable number of layers.

        :param n_inps: int, dimension of the input to the DBN
        :param n_outs: int, demension of the output of the network
        :param hidden_layer_sizes: list of ints, intermediate layers size, must contain
        at least one value
        """

        self.sigmoid_layers = []
        self.layers = []
        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        assert self.n_layers > 0

        #define the grape
        height, weight, channel = n_inp
        self.x = tf.placeholder(tf.float32, [None, height, weight, channel])
        self.y = tf.placeholder(tf.float32, [None, n_out])

        for i in range(self.n_layers):
            # Construct the sigmoidal layer

            # the size of the input is either the number of hidden units of the layer
            # below or the input size if we are on the first layer

            if i == 0:
                input_size = height * weight * channel
            else:
                input_size = hidden_layer_sizes[i - 1]

            # the input to this layer is either the activation of the hidden layer below
            # or the input of the DBN if you are on the first layer
            if i == 0:
                layer_input = tf.reshape(self.x,
                                         [-1, height * weight * channel])

            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_inp=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        activation=tf.nn.sigmoid)

            #add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # Its arguably a philosophical question... but we are going to only
            # declare that the parameters of the sigmoid_layers are parameters of the DBN.
            # The visible biases in the RBM are parameters of those RBMs, but not of the DBN

            self.params.extend(sigmoid_layer.params)
            if i == 0:
                rbm_layer = GRBM(inp=layer_input,
                                 n_visible=input_size,
                                 n_hidden=hidden_layer_sizes[i],
                                 W=sigmoid_layer.W,
                                 hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(inp=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layer_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.layers.append(rbm_layer)

        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_inp=hidden_layer_sizes[-1],
            n_out=n_out)
        self.params.extend(self.logLayer.params)
        #print(self.sigmoid_layers[-1].output)
        #print(hidden_layer_sizes[-1], n_out)
        #compute the cost for second phase of training, defined as the cost of the
        # logistic regression output layer

        self.finetune_cost = self.logLayer.cost(self.y)

        #compute the gradients with respect to the model parameters symbolic variable that
        # points to the number of errors made on the minibatch given by self.x and self.y
        self.pred = self.logLayer.pred
        self.accuracy = self.logLayer.accuracy(self.y)
        """
コード例 #7
0
ファイル: CNN.py プロジェクト: guopengyu0201/ai-project-2016
def test_CNN(learning_rate=0.01, n_epochs=1000, batch_size=20, n_hidden=500):
    dataset = load_data()
    train_set_x, train_set_y = dataset[
        0]  #tt = train_set_x.get_value(); tt.shape ---(50000, 784)
    valid_set_x, valid_set_y = dataset[1]
    test_set_x, test_set_y = dataset[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print('training set has %i batches' % n_train_batches)
    print('validate set has %i batches' % n_valid_batches)
    print('testing set has %i batches' % n_test_batches)

    #symbolic variables
    x = T.matrix()
    y = T.ivector()  #lvector: [long int] labels; ivector:[int] labels
    minibatch_index = T.lscalar()

    print 'build the model...'
    rng = numpy.random.RandomState(23455)

    # transfrom x from (batchsize, 28*28) to (batchsize,feature,28,28))
    # I_shape = (28,28),F_shape = (5,5),
    N_filters_0 = 20
    D_features_0 = 1
    layer0_input = x.reshape((batch_size, D_features_0, 28, 28))
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                filter_shape=(N_filters_0, D_features_0, 5, 5),
                                image_shape=(batch_size, 1, 28, 28))
    #layer0.output: (batch_size, N_filters_0, (28-5+1)/2, (28-5+1)/2) -> 20*20*12*12

    N_filters_1 = 50
    D_features_1 = N_filters_0
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                filter_shape=(N_filters_1, D_features_1, 5, 5),
                                image_shape=(batch_size, N_filters_0, 12, 12))
    # layer1.output: (20,50,4,4)

    layer2_input = layer1.output.flatten(2)  # (20,50,4,4)->(20,(50*4*4))
    layer2 = HiddenLayer(rng,
                         layer2_input,
                         n_in=50 * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    ##########################
    cost = layer3.negative_log_likelihood(y)
    test_model = theano.function(
        inputs=[minibatch_index],
        outputs=layer3.errors(y),
        givens={
            x:
            test_set_x[minibatch_index * batch_size:(minibatch_index + 1) *
                       batch_size],
            y:
            test_set_y[minibatch_index * batch_size:(minibatch_index + 1) *
                       batch_size]
        })

    valid_model = theano.function(
        inputs=[minibatch_index],
        outputs=layer3.errors(y),
        givens={
            x:
            valid_set_x[minibatch_index * batch_size:(minibatch_index + 1) *
                        batch_size],
            y:
            valid_set_y[minibatch_index * batch_size:(minibatch_index + 1) *
                        batch_size]
        })

    params = layer3.params + layer2.params + layer1.params + layer0.params
    gparams = T.grad(cost, params)

    updates = []
    for par, gpar in zip(params, gparams):
        updates.append((par, par - learning_rate * gpar))

    train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[cost],
        updates=updates,
        givens={
            x:
            train_set_x[minibatch_index * batch_size:(minibatch_index + 1) *
                        batch_size],
            y:
            train_set_y[minibatch_index * batch_size:(minibatch_index + 1) *
                        batch_size]
        })

    #---------------------Train-----------------------#
    print 'training...'

    epoch = 0
    patience = 10000
    patience_increase = 2
    validation_frequency = min(n_train_batches, patience / 2)
    improvement_threshold = 0.995

    best_parameters = None
    min_validation_error = numpy.inf
    done_looping = False

    start_time = time.clock()
    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for minibatch_index in xrange(n_train_batches):
            #cur_batch_train_error,cur_params = train_model(minibatch_index)
            cur_batch_train_error = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                #validation_error = numpy.mean([valid_model(idx) for idx in xrange(n_valid_batches)])
                validation_losses = [
                    valid_model(i) for i in xrange(n_valid_batches)
                ]
                validation_error = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       validation_error * 100.))

                if validation_error < min_validation_error:
                    if validation_error < min_validation_error * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    min_validation_error = validation_error
                    #best_parameters = cur_params
                    best_iter = iter

                    #test
                    test_error = numpy.mean(
                        [test_model(idx) for idx in xrange(n_test_batches)])
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_error * 100.))

            if iter >= patience:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (min_validation_error * 100., best_iter + 1, test_error * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
コード例 #8
0
    def __init__(self,
                 np_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """This class is made to support a variable number of layers.

        :type np_rng: np.random.RandomState
        :param np_rng: np random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(np_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=np_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(np_rng=np_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
コード例 #9
0
ファイル: new_convnets.py プロジェクト: qq345736500/ironly
    def __init__(self, E, U, height, width, filter_hs, conv_non_linear,
                 hidden_units, batch_size, non_static, dropout_rates,subspace_size=None,
                 activations=[Iden]):
        """
        height = sentence length (padded where necessary)
        width = word vector length (300 for word2vec)
        filter_hs = filter window sizes    
        hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
        """
        rng = np.random.RandomState(3435)
        feature_maps = hidden_units[0]
        self.batch_size = batch_size

        # define model architecture
        self.index = T.lscalar()
        self.x = T.matrix('x')   
        self.y = T.ivector('y')        
        self.Words = theano.shared(value=E, name="Words")   
        self.Users = None     
        self.u     = None
        self.subspace_size = subspace_size
        zero_vec_tensor = T.vector()
        self.zero_vec = np.zeros(width)
        # reset Words to 0?
        self.set_zero = theano.function([zero_vec_tensor],
                                        updates=[(self.Words, T.set_subtensor(self.Words[0,:],zero_vec_tensor))],
                                        allow_input_downcast=True)
        # inputs to the ConvNet go to all convolutional filters:
        layer0_input = self.Words[T.cast(self.x.flatten(), dtype="int32")].reshape(
            (self.x.shape[0], 1, self.x.shape[1], self.Words.shape[1]))
        self.conv_layers = []       
        
        # outputs of convolutional filters
        layer1_inputs = []
        image_shape = (batch_size, 1, height, width)
        filter_w = width    
        for filter_h in filter_hs:            
            filter_shape = (feature_maps, 1, filter_h, filter_w)
            pool_size = (height-filter_h+1, width-filter_w+1)
            conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,
                                            image_shape=image_shape,
                                            filter_shape=filter_shape,
                                            poolsize=pool_size,
                                            non_linear=conv_non_linear)
            layer1_input = conv_layer.output.flatten(2)
            self.conv_layers.append(conv_layer)
            layer1_inputs.append(layer1_input)
        # inputs to the MLP
        layer1_input = T.concatenate(layer1_inputs, 1)
        if U is not None:
            print "Will use user embeddings"
            self.u = T.ivector('u')
            self.Users = theano.shared(value=U, name="Users")
            them_users = self.Users[self.u]
            if self.subspace_size:
                print "and subspace"
                # set_trace()
                self.subspace = HiddenLayer(rng, them_users, U.shape[1], subspace_size, Sigmoid)
                self.peep = theano.function([self.x, self.u],[self.subspace.output,layer1_input],allow_input_downcast=True)

                layer1_input = T.concatenate((layer1_input,T.nnet.sigmoid(self.subspace.output)),1)
                layer_sizes = [feature_maps*len(filter_hs)+subspace_size]  
                # layer1_input = T.concatenate((layer1_input,them_users),1)
                # layer_sizes = [feature_maps*len(filter_hs)+U.shape[1]]

            else:
                layer1_input = T.concatenate((layer1_input,them_users),1)
                layer_sizes = [feature_maps*len(filter_hs)+U.shape[1]]

        else:
            print "NO user embeddings"
            layer_sizes = [feature_maps*len(filter_hs)]
        layer_sizes += hidden_units[1:]
        
        super(ConvNet, self).__init__(rng, input=layer1_input,
                                      layer_sizes=layer_sizes,
                                      activations=activations,
                                      dropout_rates=dropout_rates)

        # add parameters from convolutional layers
        for conv_layer in self.conv_layers:
            self.params += conv_layer.params
        if non_static:
            # if word vectors are allowed to change, add them as model parameters
            self.params += [self.Words]
        if U is not None:
            # if self.subspace_size is None:
                self.params += [self.Users]
コード例 #10
0
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=500):
    """ 在mnist数据集上验证模型
    :type learning_rate: float
    :param learning_rate: 用于梯度下降的学习率大小(或步长)
    :type n_epochs: int
    :param n_epochs: 最大的优化周期
    :type dataset: string
    :param dataset: 数据集名称
    :type nkerns: list of ints
    :param nkerns: 每一层的核的数目
    """

    # 随机化种子
    rng = numpy.random.RandomState(23455)

    # 导入数据
    datasets = load_data(dataset)

    # mnist数据集有三种,分别是train、valid、test数据集,可以分别导入它们
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # 每种类型数据集的数目,并计算它们的batch数目
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # 分配变量,用于计数batch
    index = T.lscalar()  
    
    #x:图像输入,y:标签输出
    x = T.matrix('x')   
    y = T.ivector('y')  

    ######################
    # 下面开始构建真正的模型#
    ######################
    print '... building the model'

    # 将输入数据变形为四维矩阵(batch_size,1,28,28),其中28*28为图片的大小
    layer0_input = x.reshape((batch_size, 1, 28, 28))
    
    # 构建第一个卷积-pooling层
    # filtering使得图片的大小减小为 (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling使得它变为 (24/2, 24/2) = (12, 12)
    # 输出的是4D tensor,形状是 (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # 构建第二个卷积-pooling层
    # filtering使得图片的大小减小为 (12-5+1, 12-5+1) = (8, 8)
    # maxpooling使得它变为 (8/2, 8/2) = (4, 4)
    # 输出的是4D tensor,形状是 (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # 隐藏层是全连接的,它的输入是形状为 (batch_size, num_pixels) 
    layer2_input = layer1.output.flatten(2)

    # 构建一个全连接的隐藏层,激活函数为theano.tensor.tanh
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # 对全连接层进行分类
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # 损耗函数
    cost = layer3.negative_log_likelihood(y)

    # 计算模型训练时并产生的误差 
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # 验证模型
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # 合并参数
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # 损耗函数对参数求导
    grads = T.grad(cost, params)

    # 使用SGD算法来更新参数 
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    # 训练模型
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # 训练模型 #
    ###############
    print '... training'
    # 提前停止训练的参数
    patience = 10000  
    patience_increase = 2 
                          
    improvement_threshold = 0.995  
                                   
    validation_frequency = min(n_train_batches, patience / 2)
                                  
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # 计算validation损耗
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # 如果已经得到最好的validation
               if this_validation_loss < best_validation_loss:

                    # 提高patience
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # 设置最好的validation_loss
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # 测试
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
コード例 #11
0
ファイル: DBN.py プロジェクト: skye17/newfront
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=None, n_outs=(None, None),
                 continuous=False):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: tuple of ints
        :param n_outs: dimensions of the sigmoid layers of the network
        """

        if n_outs == (None, None):
            n_outs = (10, 10)
        if hidden_layers_sizes is None:
            hidden_layers_sizes = [500, 500]
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector

        for i in range(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            if continuous and i == 0:
                rbm_layer = CRBM(numpy_rng=numpy_rng,
                                 theano_rng=theano_rng,
                                 input=layer_input,
                                 n_visible=input_size,
                                 n_hidden=hidden_layers_sizes[i],
                                 W=sigmoid_layer.W,
                                 hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.topLayer = MLP(
            rng=numpy_rng,
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_hidden=n_outs[0],
            n_out=n_outs[1])
        self.params.extend(self.topLayer.params)

        # self.logLayer = LogisticRegression(
        #     input=self.sigmoid_layers[-1].output,
        #     n_in=hidden_layers_sizes[-1],
        #     n_out=n_outs)
        # self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.topLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.topLayer.errors(self.y)
コード例 #12
0
def evaluate_lenet5(
    learning_rate=0.1,
    n_epochs=200,
    dataset="mnist.pkl.gz",
    nkerns=[20, 50],
    batch_size=500,
):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # display some chars:
    display_some(train_set_x, train_set_y.eval(), n=5, title="label=")

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2),
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2),
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        [layer3.errors(y), layer3.y_pred],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        inputs=[index],
        outputs=[cost, layer3.errors(y)],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    # for error_curve plot
    cost_train = []  # observe likelihood cost while training
    err_train = []  # observe train err while training
    err_valid = []  # observe valid err while training
    err_test = []  # observe test  err while training

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print("training @ iter = ", iter)
            train_outputs = train_model(minibatch_index)
            cost_ij = train_outputs[0]
            err_train.append(train_outputs[1])  # add error_train

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                err_valid.append(this_validation_loss)

                print("epoch %i, minibatch %i/%i, validation error %f %%" % (
                    epoch,
                    minibatch_index + 1,
                    n_train_batches,
                    this_validation_loss * 100.0,
                ))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)[0] for i in range(n_test_batches)
                    ]

                    test_score = numpy.mean(test_losses)

                    err_test.append(test_score)

                    print(("     epoch %i, minibatch %i/%i, test error of "
                           "best model %f %%") % (
                               epoch,
                               minibatch_index + 1,
                               n_train_batches,
                               test_score * 100.0,
                           ))
                    """
                    # save the best model
                    with open('../doc/data/best_model.pkl', 'wb') as f:
                        pickle.dump(layer0, layer1, layer2, layer3, f)
                    """

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print("Optimization complete.")
    print("Best validation score of %f %% obtained at iteration %i, "
          "with test performance %f %%" %
          (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0))
    print(
        ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" %
         ((end_time - start_time) / 60.0)),
        file=sys.stderr,
    )

    model = [layer0, layer1, layer2, layer3]
    # save the best model
    with open("../doc/data/best_model.pkl", "wb") as f:
        pickle.dump(model, f)

    test_pred_y = test_model(0)[1]  # predict on first batch_size sampless

    # display some chars using predict
    display_some(test_set_x, test_pred_y, n=5, title="pred=")  # n < batch_size
    return err_train, err_valid, err_test