예제 #1
0
 def addLayer(self,
              file_name=None,
              neurons=None,
              output=False,
              trainable=True):
     """Add a layer to the network
     """
     if len(self.layers) == 0:
         input_size = self.input_size
     else:
         input_size = self.layers[-1].num_neurons
     if file_name:
         if output:
             self.layers.append(
                 OutputLayer(weight_file=file_name, trainable=trainable))
         else:
             self.layers.append(
                 HiddenLayer(weight_file=file_name, trainable=trainable))
     else:
         if output:
             self.layers.append(
                 OutputLayer(num_neurons=neurons,
                             inputs=input_size,
                             trainable=trainable))
         else:
             self.layers.append(
                 HiddenLayer(num_neurons=neurons,
                             inputs=input_size,
                             trainable=trainable))
예제 #2
0
 def init_params(self):
     self.transform_hidden = HiddenLayer(input_size=self.input_hidden_size,
                                         hidden_size=5 + self.use_dx_dy,
                                         activation=act.Identity,
                                         name='Writer.Params.' + self.name)
     self.w_transform = HiddenLayer(input_size=self.input_hidden_size,
                                    hidden_size=self.channels * self.N *
                                    self.N,
                                    activation=act.Identity,
                                    name='Writer.Write.' + self.name)
예제 #3
0
    def init(self, layers_data):
        self.layers_data = layers_data
        for i in range(1, len(layers_data) - 1):
            self.hiddenLayers.append(HiddenLayer())
            self.hiddenLayers[-1].init(layers_data[i - 1] + 1, layers_data[i])

        self.outputLayer.init(layers_data[-2] + 1, layers_data[-1])
        """self.hiddenLayers[0].neurons[0].weights = [0.5, 0.4, -0.8]
		self.hiddenLayers[0].neurons[1].weights = [0.9, 1.0, 0.1]
		self.outputLayer.neurons[0].weights = [-1.2, 1.1, -0.3]"""

        self.outputs = [0 for i in range(layers_data[-1])]
    def __init__(self, batch_size, nkerns=[20, 50]):
        """
        """
        super(ConvolutionalMultilayerPerceptronClassifier, self).__init__()

        self.batch_size = batch_size
        rng = numpy.random.RandomState(23455)

        # Reshape matrix of rasterized images of shape (self.batch_size,28*28)
        # to a 4D tensor, compatible with our PoolingLayer

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
        # maxpooling reduces this further to (24/2,24/2) = (12,12)
        # 4D output tensor is thus of shape (self.batch_size,nkerns[0],12,12)
        self.layer0 = PoolingLayer(rng,
                                   image_shape=(self.batch_size, 1, 28, 28),
                                   filter_shape=(nkerns[0], 1, 5, 5),
                                   poolsize=(2, 2))

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
        # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        self.layer1 = PoolingLayer(rng,
                                   image_shape=(self.batch_size, nkerns[0], 12,
                                                12),
                                   filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                   poolsize=(2, 2))

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (self.batch_size,num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (20,32*4*4) = (20,512)

        # construct a fully-connected sigmoidal layer
        self.layer2 = HiddenLayer(rng,
                                  input_units=nkerns[1] * 4 * 4,
                                  output_units=500,
                                  nonlinear_function=Tensor.tanh)

        # classify the values of the fully-connected sigmoidal layer
        self.layer3 = LogisticClassifier(input_units=500, output_units=10)

        # create a list of all model parameters to be fit by gradient descent
        self.parameters = (self.layer3.parameters + self.layer2.parameters +
                           self.layer1.parameters + self.layer0.parameters)
예제 #5
0
    def __init__(self, rng, n_in, n_hidden, n_out, L1_reg=0.00, L2_reg=0.0001):
        """
        """
        super(MultilayerPerceptronClassifier, self).__init__()

        self.hiddenLayer = HiddenLayer(rng=rng,
                                       input_units=n_in,
                                       output_units=n_hidden,
                                       nonlinear_function=Tensor.tanh)

        self.logRegressionLayer = LogisticClassifier(input_units=n_hidden,
                                                     output_units=n_out)

        self.initialize_l1(L1_reg)
        self.initialize_l2(L2_reg)

        self.parameters = (self.hiddenLayer.parameters +
                           self.logRegressionLayer.parameters)
    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(
            rng=rng,
            input=input,
            n_in=n_in,
            n_out=n_hidden,
            activation=T.tanh
        )

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out
        )

        # Enforce L1 norm to be small
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.logRegressionLayer.W).sum()
        )

        # Enforce square of L2 norm to be small
        self.L2_sqr = (
            (self.hiddenLayer.W ** 2).sum()
            + (self.logRegressionLayer.W ** 2).sum()
        )

        # negative log likelihood of MLP is negative log likelihood of model
        # which is NLL of LR layer
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood
        )

        self.errors = self.logRegressionLayer.errors

        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        self.input = input
예제 #7
0
def evaluate_model(learning_rate=0.001,
                   n_epochs=100,
                   nkerns=[16, 40, 50, 60],
                   batch_size=20):
    """ 
    Network for classification of MNIST database

    :type learning_rate: float
    :param learning_rate: this is the initial learning rate used
                            (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_size: the batch size for training
    """

    print("Evaluating model")

    rng = numpy.random.RandomState(23455)

    # loading the data
    datasets = load_test_data()

    valid_set_x, valid_set_y = datasets[0]
    test_set_x, test_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    loaded_params = numpy.load('../saved_models/model.npy')
    layer4_W, layer4_b, layer3_W, layer3_b, layer2_W, layer2_b, layer1_W, layer1_b, layer0_W, layer0_b = loaded_params

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('Building the model...')

    chosen_height = 64
    chosen_width = 64

    # Reshape matrix of rasterized images of shape (batch_size, 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (32, 32) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, chosen_height, chosen_width))

    # Construct the first convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = MyConvPoolLayer(rng,
                             input=layer0_input,
                             image_shape=(batch_size, 3, chosen_height,
                                          chosen_width),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[0], 3, 5, 5),
                             poolsize=(2, 2))

    # Construct the second convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = MyConvPoolLayer(rng,
                             input=layer0.output,
                             image_shape=(batch_size, nkerns[0],
                                          chosen_height / 2, chosen_width / 2),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[1], nkerns[0], 5, 5),
                             poolsize=(2, 2))

    # Construct the third convolutional pooling layer
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
    layer2 = MyConvPoolLayer(rng,
                             input=layer1.output,
                             image_shape=(batch_size, nkerns[1],
                                          chosen_height / 4, chosen_width / 4),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[2], nkerns[1], 5, 5),
                             poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
    # or (500, 20 * 4 * 4) = (500, 320) with the default values.
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * (chosen_height / 8) *
                         (chosen_width / 8),
                         n_out=800,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=800, n_out=6)

    cost = layer4.negative_log_likelihood(y)

    predicted_output = layer4.y_pred

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    #Loading the model
    # f = file('../saved_models/model317.save.npy', 'r')
    # params = cPickle.load(f)
    # print(params)
    # f.close()
    # # layer4.params, layer3.params, layer2.params, layer1.params, layer0.params = params
    # # layer4.W, layer4.b = layer4.params
    # # layer3.W, layer3.b = layer3.params
    # # layer2.W, layer2.b = layer2.params
    # # layer1.W, layer1.b = layer1.params
    # # layer0.W, layer0.b = layer0.params
    # layer4.W, layer4.b, layer3.W, layer3.b, layer2.W, layer2.b, layer1.W, layer1.b, layer0.W, layer0.b = params
    # layer4.params = [layer4.W, layer4.b]
    # layer3.params = [layer3.W, layer3.b]
    # layer2.params = [layer2.W, layer2.b]
    # layer1.params = [layer1.W, layer1.b]
    # layer0.params = [layer0.W, layer0.b]

    # x = cPickle.load(f)
    # layer4.params = [layer4.W, layer4.b]
    # layer3.params = [layer3.W, layer3.b]
    # layer2.params = [layer2.W, layer2.b]
    # layer1.params = [layer1.W, layer1.b]
    # layer0.params = [layer0.W, layer0.b]

    # test it on the test set
    test_losses = [test_model(i) for i in range(n_test_batches)]
    validation_losses = [validate_model(i) for i in range(n_valid_batches)]

    test_score = numpy.mean(test_losses)
    validation_score = numpy.mean(validation_losses)
    print((' Validation error is %f %%') % (validation_score * 100.))
    print((' Test error is %f %%') % (test_score * 100.))
예제 #8
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """
        """
        super(DBN, self).__init__()

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.parameters = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input_units=input_size,
                                        output_units=hidden_layers_sizes[i],
                                        nonlinear_function=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.parameters.extend(sigmoid_layer.parameters)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RestrictedBoltzmannMachine(
                numpy_rng=numpy_rng,
                theano_rng=theano_rng,
                n_visible=input_size,
                n_hidden=hidden_layers_sizes[i],
                W=sigmoid_layer.weights,
                hbias=sigmoid_layer.biases)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticClassifier(input_units=hidden_layers_sizes[-1],
                                           output_units=n_outs)
        self.parameters.extend(self.logLayer.parameters)
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        self.theano_rng = theano_rng
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in range(self.n_layers):
            # n sigmoid layers and n dA layers

            # size of input is either hidden units of layer below, or input size for first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # input to this layer, is either:
            # activation of hidden layer below
            # or input to SDA if you are first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)

            self.params.extend(sigmoid_layer.params)

            dA_layer = DenoisingAutoEncoder(numpy_rng=numpy_rng,
                                            theano_rng=theano_rng,
                                            input=layer_input,
                                            n_visible=input_size,
                                            n_hidden=hidden_layers_sizes[i],
                                            W=sigmoid_layer.W,
                                            bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)

        self.params.extend(self.logLayer.params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)
예제 #10
0
    def stack(self):
        if self.__stacked is 0:
            self.layers = []
            self.rbm_layers = []
            self.downpass_layers = []

            for i in range(self.n_layers):
                # let the output of each layer become the input of
                # the next layer.
                if i == 0:
                    input = self.input
                    n_visible = self.n_visible
                else:
                    input = self.layers[-1].output
                    n_visible = self.n_hidden[i - 1]

                n_hidden = self.n_hidden[i]
                self.rbm_layers.append(
                    ParallelTempering(input=input,
                                      n_visible=n_visible,
                                      n_hidden=n_hidden))
                self.layers.append(
                    HiddenLayer(input=input,
                                n_in=n_visible,
                                n_out=n_hidden,
                                W=self.rbm_layers[i].W,
                                vbias=self.rbm_layers[i].vbias,
                                hbias=self.rbm_layers[i].hbias))
                print "\rFowardProbagation Layer: %i Done..." % (i)

            # building the downpass network.
            for i in reversed(xrange(self.n_layers)):
                if i == (self.n_layers - 1):
                    # last layer is same with feedfoward
                    self.downpass_layers.append(self.layers[i])
                elif i == (self.n_layers - 2):
                    # Change the layer
                    # The layer configration is a fliped hidden layer object
                    # Take the output from the last layer(except for
                    # last 2 layers.)
                    self.downpass_layers.append(
                        HiddenLayer(
                            input=self.downpass_layers[-1].feedbackward,
                            n_in=self.downpass_layers[-1].n_visible,
                            n_out=self.rbm_layers[i].n_visible,
                            W=self.rbm_layers[i].W.T,
                            vbias=self.rbm_layers[i].hbias,
                            hbias=self.rbm_layers[i].vbias))
                else:
                    self.downpass_layers.append(
                        HiddenLayer(input=self.downpass_layers[-1].output,
                                    n_in=self.downpass_layers[-1].n_hidden,
                                    n_out=self.rbm_layers[i].n_visible,
                                    W=self.rbm_layers[i].W.T,
                                    vbias=self.rbm_layers[i].hbias,
                                    hbias=self.rbm_layers[i].vbias))
                print 'BackProbagation Layer: %i Done...' % (i)
            print 'Stacking Done'
            self.__stacked = 1
        else:
            print "The Network has been builded"
        return self
예제 #11
0
    def __init__(self,
                 corpus,
                 n_emb,
                 n_hidden,
                 batch_size,
                 conv_size,
                 pooling,
                 rng=None,
                 th_rng=None,
                 load_from=None,
                 gensim_w2v=None):
        '''
        n_hidden: output conv stack size
        conv_size: filter height size
        '''
        self.corpus = corpus
        self.n_emb = n_emb
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.conv_size = conv_size
        self.pooling = pooling
        assert pooling in ('mean', 'max')

        if rng is None:
            rng = np.random.RandomState(1226)
        if th_rng is None:
            th_rng = RandomStreams(1226)

        # x/mask: (batch size, nsteps)
        x = T.matrix('x', dtype='int32')
        mask = T.matrix('mask', dtype=theano.config.floatX)
        y = T.vector('y', dtype='int32')
        batch_idx_seq = T.vector('index', dtype='int32')
        use_noise = theano.shared(th_floatX(0.))
        self.x, self.mask, self.y, self.batch_idx_seq, self.use_noise = x, mask, y, batch_idx_seq, use_noise

        # No need for transpose of x/mask in CNN
        n_samples, n_steps = x.shape
        # transpose mask-matrix to be consistent with pooling-layer-inputs
        trans_mask = mask.T
        # truncate mask-matrix to be consistent with conv-outputs
        trunc_mask = trans_mask[(conv_size - 1):]

        # list of model layers
        model_layers = []
        model_layers.append(
            EmbLayer(x,
                     load_from=load_from,
                     rand_init_params=(rng, (corpus.dic.size, n_emb)),
                     gensim_w2v=gensim_w2v,
                     dic=corpus.dic))
        # emb-out: (batch size, n_words/steps, emb_dim)
        # conv-in: (batch size, 1(input stack size), n_words/steps, emb_dim)
        # conv-out: (batch size, n_hidden(output stack size), output feature map height, 1(output feature map width))
        # pooling-in: (output feature map height, batch size, output stack size)
        conv_in = model_layers[-1].outputs[:, None, :, :]
        model_layers.append(
            ConvLayer(conv_in,
                      image_shape=(batch_size, 1, corpus.maxlen, n_emb),
                      load_from=load_from,
                      rand_init_params=(rng, (n_hidden, 1, conv_size, n_emb))))
        pooling_in = T.transpose(model_layers[-1].outputs.flatten(3),
                                 axes=(2, 0, 1))
        if pooling == 'mean':
            model_layers.append(MeanPoolingLayer(pooling_in, trunc_mask))
        else:
            model_layers.append(MaxPoolingLayer(pooling_in, trunc_mask))
        model_layers.append(
            DropOutLayer(model_layers[-1].outputs, use_noise, th_rng))
        model_layers.append(
            HiddenLayer(model_layers[-1].outputs,
                        activation=T.nnet.softmax,
                        load_from=load_from,
                        rand_init_params=(rng, (n_hidden, corpus.n_type))))
        self.model_layers = model_layers

        model_params = []
        for layer in model_layers:
            model_params += layer.params

        self.pred_prob = model_layers[-1].outputs
        self.pred = T.argmax(self.pred_prob, axis=1)
        off = 1e-8
        self.cost = -T.mean(
            T.log(self.pred_prob[T.arange(n_samples), y] + off))

        # attributes with `func` suffix is compiled function
        self.predict_func = theano.function(inputs=[x, mask],
                                            outputs=self.pred)
        self.predict_prob_func = theano.function(inputs=[x, mask],
                                                 outputs=self.pred_prob)

        grads = T.grad(self.cost, model_params)
        self.gr_updates, self.gr_sqr_updates, self.dp_sqr_updates, self.param_updates = ada_updates(
            model_params, grads)
예제 #12
0
from neural_network import NeuralNetwork
from input_layer import InputLayer
from output_layer import OutputLayer
from hidden_layer import HiddenLayer

# test train xor function
model = NeuralNetwork(InputLayer(2), HiddenLayer(2, "sigmoid"),
                      OutputLayer(1, "sigmoid"))
train_input = [[1, 1], [1, 0], [0, 1], [0, 0]]
train_output = [0, 1, 1, 0]
model.train(train_input, train_output, 1, 0.1, 20)
model.predict([1, 1])
model.predict([1, 0])
model.predict([0, 1])
model.predict([0, 0])
예제 #13
0
def evaluate_cifar(learning_rate=0.001,
                   n_epochs=100,
                   dataset_folder='cifar-10-batches-py',
                   nkerns=[16, 20, 20],
                   batch_size=32):
    """ 
    Network for classification of MNIST database

    :type learning_rate: float
    :param learning_rate: this is the initial learning rate used
                            (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset_folder: string
    :param dataset_folder: the folder containing the batch files for cifar

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_size: the batch size for training
    """

    rng = numpy.random.RandomState(23455)

    # loading the cifar data
    datasets = load_cifar_data(dataset_folder)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('Building the model...')

    # Reshape matrix of rasterized images of shape (batch_size, 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (32, 32) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = MyConvPoolLayer(rng,
                             input=layer0_input,
                             image_shape=(batch_size, 3, 32, 32),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[0], 3, 5, 5),
                             poolsize=(2, 2))

    # Construct the second convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = MyConvPoolLayer(rng,
                             input=layer0.output,
                             image_shape=(batch_size, nkerns[0], 16, 16),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[1], nkerns[0], 5, 5),
                             poolsize=(2, 2))

    # Construct the third convolutional pooling layer
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
    layer2 = MyConvPoolLayer(rng,
                             input=layer1.output,
                             image_shape=(batch_size, nkerns[1], 8, 8),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[2], nkerns[1], 5, 5),
                             poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
    # or (500, 20 * 4 * 4) = (500, 320) with the default values.
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=5)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    predicted_output = layer4.y_pred

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # the learning rate for batch SGD (adaptive learning rate)
    l_rate = T.scalar('l_rate', dtype=theano.config.floatX)
    # the momentum SGD
    momentum = T.scalar('momentum', dtype=theano.config.floatX)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = []
    for param in params:
        previous_step = theano.shared(param.get_value() * 0.,
                                      broadcastable=param.broadcastable)
        step = momentum * previous_step - l_rate * T.grad(cost, param)
        updates.append((previous_step, step))
        updates.append((param, param + step))

    train_model = theano.function(
        [index, l_rate, momentum],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('Training...')
    # early-stopping parameters
    patience = 50000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    # initializing the adaptive leaning rate
    adaptive_learning_rate = learning_rate
    # initializing the momentum
    momentum = 0.9

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        if epoch % 10 == 0:
            # decreasing the learning rate after every 10 epochs
            adaptive_learning_rate = 0.95 * adaptive_learning_rate
            # increasing the learning rate after every 10 epochs
            momentum = 1.05 * momentum

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, adaptive_learning_rate,
                                  momentum)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # increase the learning rate by small amount (adaptive)
                    adaptive_learning_rate = 1.01 * adaptive_learning_rate

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                else:
                    # decrease the learning rate by small amount (adaptive)
                    adaptive_learning_rate = 0.5 * adaptive_learning_rate

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
    def __init__(self, data):
        """Builds the network.

        Args:
            data: An instance of the class Dataset. The constructor actually
                only needs to know the input and output number of neurons from
                the dataset. However, we directly pass the whole Dataset
                instance, such that we don't have to pass it anymore in future
                function calls.

        Returns:
        """
        self._data = data

        # Determine number of hidden layers.
        num_hlayers = config.num_hidden_layers

        if isinstance(config.hidden_layer_sizes, list):
            num_hlayers = len(config.hidden_layer_sizes)

        # Check if configurations are consistent.
        if isinstance(config.lateral_inhibition_window, list) \
           and len(config.lateral_inhibition_window) != num_hlayers + 1:
            raise ConfigException('The length of the option list ' \
                                  + '\'lateral_inhibition_window\' does not' \
                                  + ' match the number of layers specified.')

        # Determine size of each layer.
        self._layer_sizes = []

        self._layer_sizes.append(data.input_size)
        if isinstance(config.hidden_layer_sizes, list):
            self._layer_sizes.extend(config.hidden_layer_sizes)
        else:
            self._layer_sizes.extend([config.hidden_layer_sizes] * num_hlayers)

        # Output layer size. The output layer has either as size the number of
        # classes or the user-defined size (if specified).
        if config.output_size is None:
            self._layer_sizes.append(data.output_size)
        else:
            self._layer_sizes.append(config.output_size)

        # Check and prepare equations
        equation_preparation.prepare_equations(num_hlayers+1)

        # To compute firing rates of neurons, we have to store the spike counts
        # of neurons before the input rates have been changed (see above
        # comment). The input layer can be ommitted.
        self._exc_prev_spike_count = []
        self._inh_prev_spike_count = [None]
        for i in range(len(self._layer_sizes)):
            self._exc_prev_spike_count.append(np.zeros(self._layer_sizes[i],
                                                       dtype=np.int64))
            if i > 0:
                self._inh_prev_spike_count.append( \
                    np.zeros(self._layer_sizes[i], dtype=np.int64))

        # In order to compute firing rates, we need to know the time difference
        # between the current time and a reference time.
        self._prev_simulation_time = np.float32(b2.defaultclock.t_)
        assert(self._prev_simulation_time == 0)

        # Layer-wise SpikeMonitors for exc. neurons.
        self._exc_spike_monitors = []
        # Layer-wise SpikeMonitors for inh. neurons (None for input layer).
        self._inh_spike_monitors = []
        # Excitatory NeuronGroup of each layer.
        self._exc_layer = []
        # Inhibitory NeuronGroup of each layer (will be None for input layer).
        self._inh_layer = []
        # Feed-forward connections from excitatory neurons of one layer to the
        # next one (fully-connected).
        self._ee_synapses = []
        # Excitatory to inhibitory connections within layer.
        self._ei_synapses = []
        # Inhibitory to excitatory connections within layer.
        self._ie_synapses = []

        ### Input Layer
        # The input of the network will be a Poisson Layer.
        self._input_group = b2.NeuronGroup(self._layer_sizes[0], 'rates : Hz',
                                           threshold='rand()<rates*dt',
                                           name='neurons_poisson_0')
        self._exc_layer.append(self._input_group)
        exc_sm_args, _ = Recordings.get_spike_monitor_args(0)
        self._exc_spike_monitors.append(b2.SpikeMonitor(self._input_group, \
            variables=exc_sm_args[0], record=exc_sm_args[1]))
        self._inh_layer.append(None)
        self._inh_spike_monitors.append(None)

        # There are no recurrent connections within the input layer.
        self._ei_synapses.append(None)
        self._ie_synapses.append(None)

        ### Hidden Layer + Output Layer
        # We can generate a seperate threade to setup each layer, as the setup
        # can be done independently.
        threads = []

        for i in range(num_hlayers + 1):
            if isinstance(config.lateral_inhibition_window, list):
                k = config.lateral_inhibition_window[i]
            else:
                k = config.lateral_inhibition_window

            threads.append(HiddenLayer(self._layer_sizes[i+1], i+1,  k,
                                       str(i+1)))

        if config.num_threads > 1:
            logger.warning('Multithreading during Network Initialization' + \
                           ' has been disabled due to known issues.')
        thread_chunks = utils.yield_chunks(threads, 1)
        #thread_chunks = utils.yield_chunks(threads, config.num_threads)

        for tc in thread_chunks:
            logger.debug('Starting threads to create %d layer/s in parallel.' \
                         % (len(tc)))
            for thread in tc:
                thread.start()

            for thread in tc:
                thread.join()

                exn = thread.exc_neurons
                inn = thread.inh_neurons
                eis = thread.ei_synapses
                ies = thread.ie_synapses

                l = len(self._exc_spike_monitors)
                exc_sm_args, inh_sm_args = Recordings.get_spike_monitor_args(l)

                self._exc_layer.append(exn)
                self._exc_spike_monitors.append(b2.SpikeMonitor(exn, \
                    variables=exc_sm_args[0], record=exc_sm_args[1]))
                self._inh_layer.append(inn)
                self._inh_spike_monitors.append(b2.SpikeMonitor(inn, \
                    variables=inh_sm_args[0], record=inh_sm_args[1]))

                self._ei_synapses.append(eis)
                self._ie_synapses.append(ies)

        ### Connect layers.
        for i in range(self.num_layers - 1):
            # Connect excitatory neurons of layer i with those of later i+1.
            eq = config._equation_module
            ees = bw.synapses(self._exc_layer[i], self._exc_layer[i+1],
                              eq.ee_model[i], eq.ee_method[i], eq.ee_on_pre[i],
                              eq.ee_on_post[i], eq.ee_delay[i],
                              eq.ee_namespace[i],
                              eq.ee_initialization[i],
                              name='synapses_ee_'+str(i+1),
                              connections=None, # Fully-connected
                              layer=i+1)
            self._ee_synapses.append(ees)

        ### Create the Brian simluation control center (Network)
        self._network = b2.Network()
        # Add all components to the network.
        self._network.add(self._exc_layer)
        self._network.add(self._inh_layer[1:])
        self._network.add(self._exc_spike_monitors)
        self._network.add(self._inh_spike_monitors[1:])
        self._network.add(self._ee_synapses)
        self._network.add(self._ei_synapses[1:])
        self._network.add(self._ie_synapses[1:])
        # Double-check correctness if one changes the code!
        #print(self._network.objects)

        # FIXME delete assertions
        assert(len(self._exc_layer) == self.num_layers)
        assert(len(self._inh_layer) == self.num_layers)
        assert(len(self._exc_spike_monitors) == self.num_layers)
        assert(len(self._inh_spike_monitors) == self.num_layers)
        assert(len(self._ei_synapses) == self.num_layers)
        assert(len(self._ie_synapses) == self.num_layers)

        self._eq_state = EqStateVars()
        self._eq_state.register(self)
예제 #15
0
파일: cnn.py 프로젝트: jaidevd/mlp
def main():
    rng = np.random.RandomState(23455)
    datasets = load_data()
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    batch_size = 500
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    nkerns = [20, 50]

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    layer0_input = x.reshape(batch_size, 1, 28, 28)

    layer0 = LeNetConvPoolLayer(rng, layer0_input,
            filter_shape=(nkerns[0], 1, 5, 5),
            image_shape=(batch_size, 1, 28, 28), poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            filter_shape=(nkerns[1], nkerns[0], 5, 5),
            image_shape=(batch_size, nkerns[0], 12, 12), poolsize=(2, 2))

    layer2_input = layer1.output.flaten(2)
    layer2 = HiddenLayer(rng, layer2_input, n_in=nkerns[1] * 4 * 4,
            n_out=500)

    layer3 = LogisticRegression(layer2.output, n_in=500, n_out=10)
    cost = layer3.negative_log_likelihood(y)

    test_model = theano.function([index], layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]
            })
    validate_model = theano.function([index], layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]
            })
    params = layer3.params + layer2.params + layer1.params + layer0.params
    grads = T.grad(cost, params)

    learning_rate = 0.1

    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i
            in zip(params, grads)]
    train_model = theano.function([index], cost, updates=updates,
            givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]
            })

    print "Start training..."
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    n_epochs = 200
    validation_frequency = min(n_train_batches, patience // 2)

    best_validation_loss = np.inf
    test_score = 0.

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)  # NOQA

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break
예제 #16
0
def generate_segmented_image_tensors(img_by_1, img_by_2, img_by_4, model_dir,
                                     batch_size, height, width,
                                     num_of_classes):
    layer_0_W = numpy.load(model_dir + '/params/layer_0_W.npy')
    layer_0_b = numpy.load(model_dir + '/params/layer_0_b.npy')
    layer_1_W = numpy.load(model_dir + '/params/layer_1_W.npy')
    layer_1_b = numpy.load(model_dir + '/params/layer_1_b.npy')
    layer_2_W = numpy.load(model_dir + '/params/layer_2_W.npy')
    layer_2_b = numpy.load(model_dir + '/params/layer_2_b.npy')
    layer_3_W = numpy.load(model_dir + '/params/layer_3_W.npy')
    layer_3_b = numpy.load(model_dir + '/params/layer_3_b.npy')
    layer_4_W = numpy.load(model_dir + '/params/layer_4_W.npy')
    layer_4_b = numpy.load(model_dir + '/params/layer_4_b.npy')
    num_kernels = numpy.load(model_dir + '/params/filer_kernels.npy')
    filter_size = numpy.load(model_dir + '/params/filter_size.npy')

    if model_dir[11] == 'M':
        is_multi_scale = True
    elif model_dir[11] == 'S':
        is_multi_scale = False
    else:
        return NotImplemented

    if model_dir[13] == 'H':
        use_hidden_layer = True
    elif model_dir[13] == 'L':
        use_hidden_layer = False
    else:
        return NotImplemented

    if model_dir[15] == 'I':
        use_interpolation = True
    elif model_dir[13] == 'L':
        use_interpolation = False
    else:
        return NotImplemented

    layer0 = CNN_Layer(
        name='Layer_0',
        W=layer_0_W,
        b=layer_0_b,
        filter_shape=(num_kernels[0], 3, filter_size, filter_size),
    )

    layer1 = CNN_Layer(
        name='Layer_1',
        W=layer_1_W,
        b=layer_1_b,
        filter_shape=(num_kernels[1], num_kernels[0], filter_size,
                      filter_size),
    )

    layer2 = CNN_Layer(
        name='Layer_2',
        W=layer_2_W,
        b=layer_2_b,
        filter_shape=(num_kernels[2], num_kernels[1], filter_size,
                      filter_size),
    )

    layer3 = HiddenLayer(name='Layer_3',
                         W=layer_3_W,
                         b=layer_3_b,
                         n_in=num_kernels[2] *
                         3 if is_multi_scale is True else num_kernels[2],
                         n_out=num_kernels[2] *
                         4 if is_multi_scale is True else num_kernels[2] * 2,
                         activation=theano.tensor.tanh)

    layer4 = LogisticRegression(
        name='Layer_4',
        W=layer_4_W,
        b=layer_4_b,
        n_in=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] *
        2,
        n_out=num_of_classes,
    )

    x_by_1 = theano.tensor.ftensor4('x_by_1')
    x_by_2 = theano.tensor.ftensor4('x_by_2')
    x_by_4 = theano.tensor.ftensor4('x_by_4')

    forward_propagation(
        layer0=layer0,
        layer1=layer1,
        layer2=layer2,
        layer3=layer3,
        layer4=layer4,
        x_by_1=x_by_1,
        x_by_2=x_by_2,
        x_by_4=x_by_4,
        num_kernels=num_kernels,
        batch_size=batch_size,
        filter_size=filter_size,
        is_multi_scale=is_multi_scale,
        height=height,
        width=width,
        use_interpolation=use_interpolation,
        use_hidden_layer=use_hidden_layer,
    )

    # create a function to compute the mistakes that are made by the model
    if is_multi_scale is True:
        test_model = theano.function([x_by_1, x_by_2, x_by_4],
                                     layer4.y_prediction)
    else:
        test_model = theano.function([x_by_1], layer4.y_prediction)

    if is_multi_scale is True:
        op = test_model(img_by_1, img_by_2, img_by_4)
    else:
        op = test_model(img_by_1)

    y = theano.tensor.reshape(op, (batch_size, height, width))
    return y.eval()
예제 #17
0
def train_CNN_mini_batch(learning_rate, n_epochs, num_kernels, batch_size,
                         filter_size, is_multi_scale, num_of_classes, height,
                         width, use_interpolation, use_hidden_layer):
    train_set_x_by_1, train_set_y, valid_set_x_by_1, valid_set_y, test_set_x_by_1, test_set_y, train_set_x_by_2, \
    train_set_x_by_4, valid_set_x_by_2, valid_set_x_by_4, test_set_x_by_2, test_set_x_by_4 \
        = load_processed_img_data()

    n_train_batches = train_set_x_by_1.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x_by_1.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x_by_1.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = theano.tensor.lscalar()
    x_by_1 = theano.tensor.ftensor4('x_by_1')
    x_by_2 = theano.tensor.ftensor4('x_by_2')
    x_by_4 = theano.tensor.ftensor4('x_by_4')

    y = theano.tensor.ivector('y')

    print '... initialize the model'

    cnn_dir = 'models/CNN_'
    if is_multi_scale is True:
        cnn_dir += 'M_'
    else:
        cnn_dir += 'S_'

    if use_hidden_layer is True:
        cnn_dir += 'H_'
    else:
        cnn_dir += 'L_'

    if use_interpolation is True:
        cnn_dir += 'I_'
    else:
        cnn_dir += 'N_'

    cnn_dir = cnn_dir + str(num_kernels[0]) + '_' + str(
        num_kernels[1]) + '_' + str(
            num_kernels[2]) + '_' + str(batch_size) + '_'
    curr_date = str(datetime.date.today())
    curr_date = curr_date.replace('-', '_')
    cnn_dir = cnn_dir + curr_date + str(time.strftime('_%H_%M_%S'))

    print 'CNN model is ', cnn_dir

    if not os.path.exists(cnn_dir):
        os.makedirs(cnn_dir)

    class Logger(object):
        def __init__(self):
            self.terminal = sys.stdout
            self.log = open(cnn_dir + '/log.txt', 'w')

        def write(self, message):
            self.terminal.write(message)
            self.log.write(message)

    sys.stdout = Logger()

    layer0 = CNN_Layer(
        name='Layer_0',
        W=None,
        b=None,
        filter_shape=(num_kernels[0], 3, filter_size, filter_size),
    )

    layer1 = CNN_Layer(
        name='Layer_1',
        W=None,
        b=None,
        filter_shape=(num_kernels[1], num_kernels[0], filter_size,
                      filter_size),
    )

    layer2 = CNN_Layer(
        name='Layer_2',
        W=None,
        b=None,
        filter_shape=(num_kernels[2], num_kernels[1], filter_size,
                      filter_size),
    )

    layer3 = HiddenLayer(name='Layer_3',
                         W=None,
                         b=None,
                         n_in=num_kernels[2] *
                         3 if is_multi_scale is True else num_kernels[2],
                         n_out=num_kernels[2] *
                         4 if is_multi_scale is True else num_kernels[2] * 2,
                         activation=theano.tensor.tanh)

    if is_multi_scale and use_hidden_layer:
        layer4_in = num_kernels[2] * 4
    elif is_multi_scale and not use_hidden_layer:
        layer4_in = num_kernels[2] * 3
    elif not is_multi_scale and use_hidden_layer:
        layer4_in = num_kernels[2] * 2
    else:
        layer4_in = num_kernels[2]

    layer4 = LogisticRegression(
        name='Layer_4',
        W=None,
        b=None,
        n_in=layer4_in,
        n_out=num_of_classes,
    )

    forward_propagation(layer0=layer0,
                        layer1=layer1,
                        layer2=layer2,
                        layer3=layer3,
                        layer4=layer4,
                        x_by_1=x_by_1,
                        x_by_2=x_by_2,
                        x_by_4=x_by_4,
                        num_kernels=num_kernels,
                        batch_size=batch_size,
                        filter_size=filter_size,
                        is_multi_scale=is_multi_scale,
                        height=height,
                        width=width,
                        use_interpolation=use_interpolation,
                        use_hidden_layer=use_hidden_layer)

    if use_hidden_layer is True:
        L2_norm = (layer4.W**2).sum() + (layer3.W**2).sum() + (
            layer2.W**2).sum() + (layer1.W**2).sum() + (layer0.W**2).sum()
    else:
        L2_norm = (layer4.W**2).sum() + (layer2.W**2).sum() + (
            layer1.W**2).sum() + (layer0.W**2).sum()

    regularization = 0.00001
    cost = layer4.negative_log_likelihood(y) + (regularization * L2_norm)

    if is_multi_scale is True:
        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                test_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                x_by_2:
                test_set_x_by_2[index * batch_size:(index + 1) * batch_size],
                x_by_4:
                test_set_x_by_4[index * batch_size:(index + 1) * batch_size],
                y:
                test_set_y[index * batch_size * height * width:(index + 1) *
                           batch_size * height * width]
            })
    else:
        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                test_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                y:
                test_set_y[index * batch_size * height * width:(index + 1) *
                           batch_size * height * width]
            })

    if is_multi_scale is True:
        validate_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                valid_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                x_by_2:
                valid_set_x_by_2[index * batch_size:(index + 1) * batch_size],
                x_by_4:
                valid_set_x_by_4[index * batch_size:(index + 1) * batch_size],
                y:
                valid_set_y[index * batch_size * height * width:(index + 1) *
                            batch_size * height * width]
            })
    else:
        validate_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x_by_1:
                valid_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                y:
                valid_set_y[index * batch_size * height * width:(index + 1) *
                            batch_size * height * width]
            })

    if use_hidden_layer is True:
        params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    else:
        params = layer4.params + layer2.params + layer1.params + layer0.params

    grads = theano.tensor.grad(cost, params)

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    if is_multi_scale is True:
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x_by_1:
                train_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                x_by_2:
                train_set_x_by_2[index * batch_size:(index + 1) * batch_size],
                x_by_4:
                train_set_x_by_4[index * batch_size:(index + 1) * batch_size],
                y:
                train_set_y[index * batch_size * width * height:(index + 1) *
                            batch_size * width * height]
            })
    else:
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x_by_1:
                train_set_x_by_1[index * batch_size:(index + 1) * batch_size],
                y:
                train_set_y[index * batch_size * width * height:(index + 1) *
                            batch_size * width * height]
            })

    print '... training the model'
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    best_layer_0_W = numpy.zeros_like(layer0.W.get_value())
    best_layer_0_b = numpy.zeros_like(layer0.b.get_value())
    best_layer_1_W = numpy.zeros_like(layer1.W.get_value())
    best_layer_1_b = numpy.zeros_like(layer1.b.get_value())
    best_layer_2_W = numpy.zeros_like(layer2.W.get_value())
    best_layer_2_b = numpy.zeros_like(layer2.b.get_value())
    best_layer_3_W = numpy.zeros_like(layer3.W.get_value())
    best_layer_3_b = numpy.zeros_like(layer3.b.get_value())
    best_layer_4_W = numpy.zeros_like(layer4.W.get_value())
    best_layer_4_b = numpy.zeros_like(layer4.b.get_value())

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for mini_batch_index in xrange(n_train_batches):

            start = time.clock()
            iter = (epoch - 1) * n_train_batches + mini_batch_index
            cost_ij = train_model(mini_batch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, mini-batch %i/%i, validation error %f %%' %
                      (epoch, mini_batch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # save best filters
                    best_layer_0_W = layer0.W.get_value()
                    best_layer_0_b = layer0.b.get_value()
                    best_layer_1_W = layer1.W.get_value()
                    best_layer_1_b = layer1.b.get_value()
                    best_layer_2_W = layer2.W.get_value()
                    best_layer_2_b = layer2.b.get_value()
                    best_layer_3_W = layer3.W.get_value()
                    best_layer_3_b = layer3.b.get_value()
                    best_layer_4_W = layer4.W.get_value()
                    best_layer_4_b = layer4.b.get_value()

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]

                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, mini-batch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, mini_batch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

            print 'training @ iter = %d, time taken = %f' % (iter,
                                                             (time.clock() -
                                                              start))

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    if not os.path.exists(cnn_dir + '/params'):
        os.makedirs(cnn_dir + '/params')

    numpy.save(cnn_dir + '/params/layer_0_W.npy', best_layer_0_W)
    numpy.save(cnn_dir + '/params/layer_0_b.npy', best_layer_0_b)
    numpy.save(cnn_dir + '/params/layer_1_W.npy', best_layer_1_W)
    numpy.save(cnn_dir + '/params/layer_1_b.npy', best_layer_1_b)
    numpy.save(cnn_dir + '/params/layer_2_W.npy', best_layer_2_W)
    numpy.save(cnn_dir + '/params/layer_2_b.npy', best_layer_2_b)
    numpy.save(cnn_dir + '/params/layer_3_W.npy', best_layer_3_W)
    numpy.save(cnn_dir + '/params/layer_3_b.npy', best_layer_3_b)
    numpy.save(cnn_dir + '/params/layer_4_W.npy', best_layer_4_W)
    numpy.save(cnn_dir + '/params/layer_4_b.npy', best_layer_4_b)
    numpy.save(cnn_dir + '/params/filer_kernels.npy', num_kernels)
    numpy.save(cnn_dir + '/params/filter_size.npy', filter_size)

    return cnn_dir
예제 #18
0
def evaluate_model(learning_rate=0.001,
                   n_epochs=100,
                   nkerns=[16, 40, 50, 60],
                   batch_size=20):
    """ 
    Network for classification of MNIST database

    :type learning_rate: float
    :param learning_rate: this is the initial learning rate used
                            (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_size: the batch size for training
    """

    print("Evaluating model")

    rng = numpy.random.RandomState(23455)

    # loading the data1
    datasets = load_test_data(1)

    valid_set_x, valid_set_y = datasets[0]
    test_set_x, test_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    loaded_params = numpy.load('../saved_models/model1.npy')
    layer4_W, layer4_b, layer3_W, layer3_b, layer2_W, layer2_b, layer1_W, layer1_b, layer0_W, layer0_b = loaded_params

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('Building the model...')

    # Reshape matrix of rasterized images of shape (batch_size, 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (32, 32) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 64, 88))

    # Construct the first convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = MyConvPoolLayer(rng,
                             input=layer0_input,
                             image_shape=(batch_size, 1, 64, 88),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[0], 1, 5, 5),
                             poolsize=(2, 2),
                             W=layer0_W,
                             b=layer0_b)

    # Construct the second convolutional pooling layer:
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = MyConvPoolLayer(rng,
                             input=layer0.output,
                             image_shape=(batch_size, nkerns[0], 32, 44),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[1], nkerns[0], 5, 5),
                             poolsize=(2, 2),
                             W=layer1_W,
                             b=layer1_b)

    # Construct the third convolutional pooling layer
    # filtering does not reduce the layer size because we use padding
    # maxpooling reduces the size to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
    layer2 = MyConvPoolLayer(rng,
                             input=layer1.output,
                             image_shape=(batch_size, nkerns[1], 16, 22),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[2], nkerns[1], 5, 5),
                             poolsize=(2, 2),
                             W=layer2_W,
                             b=layer2_b)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
    # or (500, 20 * 4 * 4) = (500, 320) with the default values.
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 8 * 11,
                         n_out=800,
                         activation=T.tanh,
                         W=layer3_W,
                         b=layer3_b)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output,
                                n_in=800,
                                n_out=6,
                                W=layer4_W,
                                b=layer4_b)

    cost = layer4.negative_log_likelihood(y)

    predicted_output = layer4.y_pred

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    val_model_preds = theano.function(
        [index],
        layer4.prediction(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    val_preds = [val_model_preds(i) for i in range(n_valid_batches)]

    #print(val_preds)
    #preds = numpy(val_preds)

    preds = []
    for pred in val_preds:
        for p in pred:
            preds.append(p)

    #preds = val_preds.reshape(valid_set_x.get_value(borrow=True).shape[0])

    actual_labels = load_test_data(1, 2)
    n = len(actual_labels)

    confusion_matrix = numpy.zeros((6, 6))

    for i in range(n):
        confusion_matrix[int(actual_labels[i])][preds[i]] += 1

    print(confusion_matrix)

    correct = 0.0
    for i in range(n):
        if (preds[i] == int(actual_labels[i])):
            correct += 1.0

    accuracy = correct / n
    print("Number of correctly classified : ", correct)
    print("Test accuracy is", accuracy * 100)
예제 #19
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
        # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
예제 #20
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.parameters = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input_units=input_size,
                                        output_units=hidden_layers_sizes[i],
                                        nonlinear_function=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.parameters.extend(sigmoid_layer.parameters)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = DenoisingAutoencoder(numpy_rng=numpy_rng,
                                            theano_rng=theano_rng,
                                            n_visible=input_size,
                                            n_hidden=hidden_layers_sizes[i],
                                            W=sigmoid_layer.weights,
                                            bhid=sigmoid_layer.biases)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticClassifier(input_units=hidden_layers_sizes[-1],
                                           output_units=n_outs)

        self.parameters.extend(self.logLayer.parameters)
예제 #21
0
def evaluate_lenet5(learning_rate=0.1, momentum=0.9, n_epochs=500,
                    dataset='mnist', depth = 1, augment_data = False,
                    nkerns=[20, 50, 100], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type n_feature_maps: int
    :param n_feature_maps: number of feature maps in input, i.e. 3 for RGB image

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset, augment_data)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Set the initial dimensions of the input images
    if (dataset == 'mnist'):
        in_dim = 28;
    elif (dataset == 'cifar-10'):
        in_dim = 32;


    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, depth, in_dim, in_dim))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    filter_dim = 5
    pool_dim = 1

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, depth, in_dim, in_dim),
        filter_shape=(nkerns[0], depth, filter_dim, filter_dim),
        poolsize=(pool_dim, pool_dim)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    in_dim = (in_dim - filter_dim + 1) / pool_dim
    filter_dim = 5
    pool_dim = 1

    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], in_dim, in_dim),
        filter_shape=(nkerns[1], nkerns[0], filter_dim, filter_dim),
        poolsize=(pool_dim, pool_dim)
    )

    # Construct the third convolutional layer with no pooling
    # filtering reduces the image size to (4-3+1, 4-3+1) = (2, 2)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2)
    in_dim = (in_dim - filter_dim + 1) / pool_dim
    filter_dim = 3
    pool_dim = 1

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, nkerns[1], in_dim, in_dim),
        filter_shape=(nkerns[2], nkerns[1], filter_dim, filter_dim),
        poolsize=(pool_dim, pool_dim)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 2 * 2),
    # or (500, 200 * 2 * 2) = (500, 800) with the default values.
    layer3_input = layer2.output.flatten(2)
    # construct a fully-connected rectifier layer
    in_dim = (in_dim - filter_dim + 1) / pool_dim

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=nkerns[2] * in_dim * in_dim,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
            
    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=gradient_updates_momentum(cost, params, learning_rate, momentum),
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look at this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatches before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        learning_rate = set_learning_rate(learning_rate, epoch, dataset)
        momentum = set_momentum(momentum, epoch)

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)

            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on train set
                train_losses = [train_model(i) for i
                                     in range(n_train_batches)]
                this_train_loss = numpy.mean(train_losses)
                print('     epoch %i, minibatch %i/%i, train error %f %%\n' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_train_loss * 100.))

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('     epoch %i, minibatch %i/%i, validation error %f %%\n' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%\n') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break



    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
예제 #22
0
def evaluate_model(learning_rate=0.005,
                   n_epochs=50,
                   nkerns=[16, 40, 50, 60],
                   batch_size=32):
    """ 
    Network for classification 

    :type learning_rate: float
    :param learning_rate: this is the initial learning rate used
                            (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_size: the batch size for training
    """

    print("Evaluating model")

    rng = numpy.random.RandomState(23455)

    # loading the data
    datasets = load_data(3)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('Building the model...')

    layer0_input = x.reshape((batch_size, 1, 64, 88))

    layer0 = MyConvPoolLayer(rng,
                             input=layer0_input,
                             image_shape=(batch_size, 1, 64, 88),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[0], 1, 5, 5),
                             poolsize=(2, 2))

    layer1 = MyConvPoolLayer(rng,
                             input=layer0.output,
                             image_shape=(batch_size, nkerns[0], 32, 44),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[1], nkerns[0], 5, 5),
                             poolsize=(2, 2))

    layer2 = MyConvPoolLayer(rng,
                             input=layer1.output,
                             image_shape=(batch_size, nkerns[1], 16, 22),
                             p1=2,
                             p2=2,
                             filter_shape=(nkerns[2], nkerns[1], 5, 5),
                             poolsize=(2, 2))

    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * 8 * 11,
                         n_out=800,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=800, n_out=6)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    predicted_output = layer4.y_pred

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # the learning rate for batch SGD (adaptive learning rate)
    l_rate = T.scalar('l_rate', dtype=theano.config.floatX)
    adaptive_learning_rate = T.scalar('adaptive_learning_rate',
                                      dtype=theano.config.floatX)
    # the momentum SGD
    momentum = T.scalar('momentum', dtype=theano.config.floatX)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = []
    for param in params:
        previous_step = theano.shared(param.get_value() * 0.,
                                      broadcastable=param.broadcastable)
        step = momentum * previous_step - l_rate * T.grad(cost, param)
        updates.append((previous_step, step))
        updates.append((param, param + step))

    train_model = theano.function(
        [index, l_rate, momentum],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('Training...')
    # early-stopping parameters
    patience = 50000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    # initializing the adaptive leaning rate
    adaptive_learning_rate = learning_rate
    # initializing the momentum
    momentum = 0.1
    a = 0.0001
    b = 0.3

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        if epoch % 5 == 0:
            # decreasing the learning rate after every 10 epochs
            adaptive_learning_rate = 0.95 * adaptive_learning_rate
            # increasing the learning rate after every 10 epochs
            #momentum = 1.005 * momentum

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, adaptive_learning_rate,
                                  momentum)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # increase the learning rate by small amount (adaptive)
                    adaptive_learning_rate += a

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    #Save the model
                    print("Saving model")
                    save_filename = "../saved_models/model3"

                    x = numpy.array([
                        layer4.W.get_value(),
                        layer4.b.get_value(),
                        layer3.W.get_value(),
                        layer3.b.get_value(),
                        layer2.W.get_value(),
                        layer2.b.get_value(),
                        layer1.W.get_value(),
                        layer1.b.get_value(),
                        layer0.W.get_value(),
                        layer0.b.get_value()
                    ])

                    numpy.save(save_filename, x)

                    # f = file(save_filename, 'wb')
                    # # cPickle.dump([param.get_value() for param in params], f, protocol=cPickle.HIGHEST_PROTOCOL)
                    # cPickle.dump([param.get_value() for param in params], f, protocol=cPickle.HIGHEST_PROTOCOL)
                    # # cPickle.dump(params, f, protocol=cPickle.HIGHEST_PROTOCOL)

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                else:
                    # decrease the learning rate by small amount (adaptive)
                    adaptive_learning_rate = adaptive_learning_rate - (
                        b * adaptive_learning_rate) + (0.01 * a)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
예제 #23
0
    def __init__(self,
                 corpus,
                 n_emb,
                 n_hidden,
                 pooling,
                 rng=None,
                 th_rng=None,
                 load_from=None,
                 gensim_w2v=None):
        self.corpus = corpus
        self.n_emb = n_emb
        self.n_hidden = n_hidden
        self.pooling = pooling
        assert pooling in ('mean', 'max')

        if rng is None:
            rng = np.random.RandomState(1226)
        if th_rng is None:
            th_rng = RandomStreams(1226)

        # x/mask: (batch size, nsteps)
        x = T.matrix('x', dtype='int32')
        mask = T.matrix('mask', dtype=theano.config.floatX)
        y = T.vector('y', dtype='int32')
        batch_idx_seq = T.vector('index', dtype='int32')
        use_noise = theano.shared(th_floatX(0.))
        self.x, self.mask, self.y, self.batch_idx_seq, self.use_noise = x, mask, y, batch_idx_seq, use_noise

        # TRANSPOSE THE AXIS!
        trans_x, trans_mask = x.T, mask.T
        # trancate the useless data
        trunc_x, trunc_mask = RNNModel.trunc_inputs_mask(trans_x, trans_mask)
        n_steps, n_samples = trunc_x.shape

        # list of model layers
        model_layers = []
        model_layers.append(
            EmbLayer(trunc_x,
                     load_from=load_from,
                     rand_init_params=(rng, (corpus.dic.size, n_emb)),
                     gensim_w2v=gensim_w2v,
                     dic=corpus.dic))
        model_layers.append(
            RNNLayer(model_layers[-1].outputs,
                     trunc_mask,
                     load_from=load_from,
                     rand_init_params=(rng, (n_emb, n_hidden))))
        if pooling == 'mean':
            model_layers.append(
                MeanPoolingLayer(model_layers[-1].outputs, trunc_mask))
        else:
            model_layers.append(
                MaxPoolingLayer(model_layers[-1].outputs, trunc_mask))
        model_layers.append(
            DropOutLayer(model_layers[-1].outputs, use_noise, th_rng))
        model_layers.append(
            HiddenLayer(model_layers[-1].outputs,
                        activation=T.nnet.softmax,
                        load_from=load_from,
                        rand_init_params=(rng, (n_hidden, corpus.n_type))))
        self.model_layers = model_layers

        model_params = []
        for layer in model_layers:
            model_params += layer.params

        self.pred_prob = model_layers[-1].outputs
        self.pred = T.argmax(self.pred_prob, axis=1)
        off = 1e-8
        self.cost = -T.mean(
            T.log(self.pred_prob[T.arange(n_samples), y] + off))

        # attributes with `func` suffix is compiled function
        self.predict_func = theano.function(inputs=[x, mask],
                                            outputs=self.pred)
        self.predict_prob_func = theano.function(inputs=[x, mask],
                                                 outputs=self.pred_prob)

        grads = T.grad(self.cost, model_params)
        self.gr_updates, self.gr_sqr_updates, self.dp_sqr_updates, self.param_updates = ada_updates(
            model_params, grads)